In [None]:
pip install requests
pip install lxml
pip install pandas
pip install openpyxl
pip install cobra
pip install cobrakbase
# for windows only, download the file from https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyeda firstly
# pip install pyeda-0.28.0-cp37-cp37m-win_amd64.whl  
pip install modelseedpy
pip install reframed


In [1]:
org_name = 'mko'
org_full_name = 'Methylomonas_koyamae'


In [1]:
org_name = 'mbry'
org_full_name = 'Methylocystis_bryophila'

In [18]:
from reframed import load_cbmodel
model = load_cbmodel(org_name+'_model\\'+org_name+'_add_from_modelseed_4.xml', flavor='fbc2')

In [19]:
model.summary()

Metabolites:
c0 1080
e0 104

Reactions:
enzymatic 0
transport 123
exchange 109
sink 0
other 922


# build model from modelseed

## Initialize new model

In [2]:
import cobra

new_model = cobra.Model(org_full_name)
new_model

0,1
Name,Methylocystis_bryophila
Memory address,15a8dc6a220
Number of metabolites,0
Number of reactions,0
Number of genes,0
Number of groups,0
Objective expression,0
Compartments,


## KEGG ID mapping to modelseed

In [3]:
import pandas as pd

#org_name = 'mbry'
df_reaction = pd.read_excel(org_name+'_model\\'+org_name+'_clean_kegg_reaction.xlsx', index_col=1)  
titlename= ['entry_name','pathway_title','reaction_type','reaction_substrates','reaction_products']
df_keggID_modelseedID = pd.read_csv('reaction_keggID_modelseedID.txt', sep='\t')
mapping_reaction = pd.merge(df_reaction,df_keggID_modelseedID,how='left',on='Kegg_ID').drop_duplicates(subset=['Kegg_ID'], keep='first')[['ModelSeed_ID','Kegg_ID']+titlename]
mapping_reaction.to_excel(org_name+'_model\\'+org_name+'_mapping_reaction.xlsx', sheet_name='Sheet1', header=True)

print('done!')

done!


## add c0 reactions from ModelSeed Database

In [3]:
def add_ms_reaction(
    model,
    rxn_id,
    modelseed,
    compartment = 'c0',
    direction="forward",
):  # Xinli modified from modelseedpy.core.mseditorapi

    modelseed_reaction = modelseed.get_seed_reaction(rxn_id)
    reaction_stoich = modelseed_reaction.cstoichiometry
    cobra_reaction = cobra.Reaction(rxn_id+'_'+ compartment)
    cobra_reaction.name = str(modelseed_reaction.data["name"])+'_'+compartment

    metabolites_to_add = {}
    for metabolite, stoich in reaction_stoich.items():
        id = metabolite[0]
        compound = modelseed.get_seed_compound(id)
        if int(metabolite[1]) == 0:
            compartment_string = 'c0'
        elif int(metabolite[1]) == 1:
            compartment_string = 'e0'
        else:
            compartment_string = ''
            print(str(id)+' compartment wrong')

        if compound.formula == compound.formula:
            formula = compound.formula
        else:
            formula = ''
        if compound.data['charge']:
            charge = compound.data['charge']
        else:
            charge = None
    
        metabolites_to_add[
            cobra.Metabolite(
                id+'_'+compartment_string, name=compound.data["name"]+'_'+compartment_string, compartment=compartment_string, formula = formula, charge = charge
            )
        ] = stoich

    cobra_reaction.add_metabolites(metabolites_to_add)
    cobra_reaction.reaction

    if direction == "reversible":
        cobra_reaction.lower_bound = -1000
    elif direction == "backward":
        cobra_reaction.lower_bound = -1000
        cobra_reaction.upper_bound = 0

    model.add_reactions([cobra_reaction])

In [4]:
import cobra
import pandas as pd

mapping_reaction = pd.read_excel(org_name+'_model\\'+org_name+'_mapping_reaction.xlsx', index_col=0)  
modelseed_id_list = list(mapping_reaction['ModelSeed_ID'].dropna(axis = 0, how = 'any'))  # reaction ID that need to be added


import modelseedpy
modelseed_path = 'C:\\Users\\vickenlee\\ModelSEEDDatabase'
modelseed = modelseedpy.biochem.from_local(modelseed_path)

print('======================== \n %d reactions begin to added \n'%len(modelseed_id_list))

i,j = 0,0
for rxn_id in modelseed_id_list:
    reaction_type = mapping_reaction.query('ModelSeed_ID == "'+ rxn_id +'"')['reaction_type'].values[0]
    if reaction_type == 'reversible':
        direction = 'reversible'
    elif reaction_type == 'irreversible':
        direction = 'forward'
    try:
        add_ms_reaction(new_model, rxn_id = rxn_id, modelseed = modelseed, compartment = 'c0', direction = direction)
        print(rxn_id+' added')
        i+=1
    except:
        print(rxn_id+' added wrong !!!')
        j+=1

print('\n %d / %d reactions added successfully \n ====================='%(i,i+j))
'''
for metabolite in new_model.metabolites:
    metaid = [meta for meta in metabolite.id.split('_') if meta.startswith('cpd')]
    if metaid:
        ms_metabolite = modelseed.get_seed_compound(metaid[0])
        if  ms_metabolite.formula == ms_metabolite.formula:
            metabolite.formula = ms_metabolite.formula
        metabolite.charge = ms_metabolite.data['charge']
'''
cobra.io.write_sbml_model(new_model,filename=org_name+'_model\\'+org_name+'_add_from_modelseed_1.xml')

 920 reactions begin to added 

rxn15116 added
rxn00506 added
rxn00536 added
rxn00543 added
rxn00011 added
rxn02342 added
rxn01871 added
rxn00148 added
rxn00459 added
rxn01106 added
rxn00781 added
rxn00747 added
rxn15493 added
rxn15364 added
rxn00704 added
rxn02380 added
rxn01169 added
rxn01977 added
rxn15249 added
rxn15694 added
rxn01100 added
rxn15989 added
rxn00247 added
rxn00175 added
rxn00507 added
rxn15298 added
rxn00151 added
rxn00147 added
rxn00441 added
rxn02376 added
rxn01872 added
rxn00285 added
rxn00199 added
rxn01387 added
rxn00799 added
rxn01388 added
rxn00974 added
rxn00256 added
rxn00248 added
rxn06109 added
rxn00770 added
rxn01200 added
rxn00777 added
rxn01116 added
rxn15271 added
rxn01115 added
rxn01476 added
rxn01975 added
rxn29919 added
rxn00778 added
rxn08647 added
rxn01187 added
rxn11040 added
rxn00213 added
rxn00211 added
rxn06201 added
rxn15081 added
rxn15270 added
rxn01329 added
rxn00642 added
rxn00641 added
rxn01870 added
rxn02438 added
rxn02314 added
rxn00214

#### check balance

In [5]:
def check_balance(reaction,H_metabolite=None):
    try:
        feedback = reaction.check_mass_balance()
        if feedback :
            print(reaction.id + ': ' + str(feedback))
            print('      '+ reaction.reaction)
            if H_metabolite:
                if 'H' in feedback and feedback.get('H') == feedback.get('charge'):
                    H_to_add={H_metabolite:-1*feedback.get('H')}
                    reaction.add_metabolites(H_to_add)
                    print('      H is added')
                    return(check_balance(reaction,H_metabolite))
            return int(0)
        else:
            #print(reaction.id + ': banlance')
            #print('      '+ reaction.reaction)
            return int(1)
    except:
        print(reaction.id + ': error')
        print('      '+ reaction.reaction)
        return int(0)



import cobra
import pandas as pd

H_metabolite = new_model.metabolites.get_by_id('cpd00067_c0')

i=0
for reaction in new_model.reactions:
    i=i+check_balance(reaction,H_metabolite)
print('After add H+, %d / %d reactions are balanced'%(i,len(new_model.reactions)))

cobra.io.write_sbml_model(new_model,filename=org_name+'_model\\'+org_name+'_add_from_modelseed_2.xml')

rxn15989_c0: {'H': -2.0}
      cpd00363_c0 + 2.0 cpd19499_c0 <=> cpd00071_c0 + 2.0 cpd19500_c0
rxn05929_c0: {'C': 6.0, 'H': 10.0, 'O': 5.0}
      cpd00001_c0 --> cpd00108_c0
rxn07579_c0: {'charge': 2.0, 'C': -11.0, 'H': -21.0, 'N': -2.0, 'O': -7.0, 'P': -1.0, 'R': -1.0}
      cpd00004_c0 + cpd00067_c0 + cpd14940_c0 <=> cpd00003_c0 + cpd12458_c0
rxn07578_c0: {'C': 11.0, 'H': 21.0, 'O': 7.0, 'charge': -2.0, 'N': 2.0, 'P': 1.0}
      cpd14939_c0 <=> cpd00001_c0 + cpd14940_c0
rxn07576_c0: {'charge': -1.0}
      cpd00067_c0 + cpd11476_c0 + cpd11492_c0 --> cpd00011_c0 + cpd11493_c0 + cpd14938_c0
rxn07577_c0: {'charge': 2.0, 'C': -11.0, 'H': -21.0, 'N': -2.0, 'O': -7.0, 'P': -1.0}
      cpd00005_c0 + cpd00067_c0 + cpd14938_c0 <=> cpd00006_c0 + cpd14939_c0
rxn05350_c0: {'charge': -1.0}
      cpd00067_c0 + cpd11472_c0 + cpd11492_c0 --> cpd00011_c0 + cpd11490_c0 + cpd11493_c0
rxn05336_c0: {'charge': -3.0}
      cpd00006_c0 + 2.0 cpd00067_c0 + cpd11481_c0 <=> cpd00005_c0 + cpd11485_c0
rxn05323_c0

#### print unbalanced

In [6]:
for reaction in new_model.reactions:
    if check_balance(reaction)==0:
        print('           https://modelseed.org/solr/reactions/select?wt=json&q=id:'+reaction.id.replace('_c0',''))
        for meta, sto in reaction.metabolites.items():
            print('           '+meta.id+'    '+ str(meta.formula)+'    '+str(meta.charge))

        #new_model.metabolites.get_by_id('C00042_c0').charge


rxn15989_c0: {'H': -2.0}
      cpd00363_c0 + 2.0 cpd19499_c0 <=> cpd00071_c0 + 2.0 cpd19500_c0
           https://modelseed.org/solr/reactions/select?wt=json&q=id:rxn15989
           cpd19499_c0        None
           cpd19500_c0        None
           cpd00363_c0    C2H6O    None
           cpd00071_c0    C2H4O    None
rxn05929_c0: {'C': 6.0, 'H': 10.0, 'O': 5.0}
      cpd00001_c0 --> cpd00108_c0
           https://modelseed.org/solr/reactions/select?wt=json&q=id:rxn05929
           cpd00108_c0    C6H12O6    None
           cpd00001_c0    H2O    None
rxn07579_c0: {'charge': 2.0, 'C': -11.0, 'H': -21.0, 'N': -2.0, 'O': -7.0, 'P': -1.0, 'R': -1.0}
      cpd00004_c0 + cpd00067_c0 + cpd14940_c0 <=> cpd00003_c0 + cpd12458_c0
           https://modelseed.org/solr/reactions/select?wt=json&q=id:rxn07579
           cpd14940_c0    C29H54N2O8PRS    -2
           cpd12458_c0    C18H35OS    None
           cpd00004_c0    C21H27N7O14P2    -2
           cpd00067_c0    H    1
           cpd00003_c0  

In [7]:
new_model

0,1
Name,Methylocystis_bryophila
Memory address,2c27f7bad30
Number of metabolites,1017
Number of reactions,920
Number of genes,0
Number of groups,0
Objective expression,0
Compartments,c0


## add bio reaction from Drift model

In [4]:
import cobra

drift_model_path = org_name+'_model\\ModelSeed_model_DSMZ_21852.xml'
#drift_model_path = org_name+'_model\\ModelSeed_model_'+org_full_name+'.xml'
drift_model = cobra.io.read_sbml_model(drift_model_path)

In [5]:
def add_reaction_from_driftModel(
    new_model,
    drift_model,
    rxn_id,
): 
    reaction_drift = drift_model.reactions.get_by_id(rxn_id)
    cobra_reaction = cobra.Reaction(rxn_id)
    cobra_reaction.name = reaction_drift.name
    cobra_reaction.lower_bound = reaction_drift.lower_bound
    cobra_reaction.upper_bound = reaction_drift.upper_bound
    
    metabolites_to_add = {}
    for metabolite, stoich in reaction_drift.metabolites.items():
        id = metabolite.id
        name = metabolite.name
        charge = metabolite.charge
        compartment = metabolite.compartment
        
        if  metabolite.formula == metabolite.formula:
            formula = metabolite.formula
        else:
            formula = ''
        metabolites_to_add[
                cobra.Metabolite(id, name=name, compartment=compartment, formula=formula, charge=charge)
            ] = stoich

    
    cobra_reaction.add_metabolites(metabolites_to_add)
    new_model.add_reactions([cobra_reaction])

In [6]:
new_model = cobra.io.read_sbml_model(org_name+'_model\\'+org_name+'_add_from_modelseed_2.xml')
add_reaction_from_driftModel(new_model,drift_model,'bio1_biomass')
new_model.objective = new_model.problem.Objective(new_model.reactions.get_by_id('bio1_biomass').flux_expression, direction='max')

cobra.io.write_sbml_model(new_model,filename=org_name+'_model\\'+org_name+'_add_from_modelseed_3.xml')

No objective coefficients in model. Unclear what should be optimized


## add trans reactions

In [7]:
trans_reactions = []
ex_reactions = []
for reaction in drift_model.reactions:
    if reaction.id.startswith('EX_'):
        ex_reactions.append(reaction.id)
        continue
    for meta, sto in reaction.metabolites.items():
        if meta.id.endswith('_e0'):
            trans_reactions.append(reaction.id)
            break
print(len(trans_reactions))

123


In [8]:
for id in trans_reactions:
    print(id)
    rxn = drift_model.reactions.get_by_id(id)
    for meta, sto in rxn.metabolites.items():
        name = drift_model.metabolites.get_by_id(meta.id).name
        print(meta.id+'\t'+ name+'\t'+ str(sto))
    print('\n')

rxn05561_c0
cpd00067_e0	H+_e0	-1.0
cpd00106_e0	Fumarate_e0	-1.0
cpd00067_c0	H+_c0	1.0
cpd00106_c0	Fumarate_c0	1.0


rxn05625_c0
cpd00067_e0	H+_e0	-1.0
cpd00075_e0	Nitrite_e0	-1.0
cpd00067_c0	H+_c0	1.0
cpd00075_c0	Nitrite_c0	1.0


rxn08734_c0
cpd00067_c0	H+_c0	-2.0
cpd11640_c0	H2_c0	-1.0
cpd15560_c0	Ubiquinone-8_c0	-1.0
cpd00067_e0	H+_e0	2.0
cpd15561_c0	Ubiquinol-8_c0	1.0


rxn05319_c0
cpd00001_e0	H2O_e0	-1.0
cpd00001_c0	H2O_c0	1.0


rxn09295_c0
cpd00004_c0	NADH_c0	-1.0
cpd00006_c0	NADP_c0	-1.0
cpd00067_e0	H+_e0	-2.0
cpd00003_c0	NAD_c0	1.0
cpd00005_c0	NADPH_c0	1.0
cpd00067_c0	H+_c0	2.0


rxn05145_c0
cpd00001_c0	H2O_c0	-1.0
cpd00002_c0	ATP_c0	-1.0
cpd00009_e0	Phosphate_e0	-1.0
cpd00008_c0	ADP_c0	1.0
cpd00009_c0	Phosphate_c0	2.0
cpd00067_c0	H+_c0	1.0


rxn10122_c0
cpd00004_c0	NADH_c0	-1.0
cpd00067_c0	H+_c0	-4.5
cpd15560_c0	Ubiquinone-8_c0	-1.0
cpd00003_c0	NAD_c0	1.0
cpd00067_e0	H+_e0	3.5
cpd15561_c0	Ubiquinol-8_c0	1.0


rxn08173_c0
cpd00008_c0	ADP_c0	-1.0
cpd00009_c0	Phosphate_c0	-1.0
cpd

In [9]:
sub_trans_rxn = ['rxn05226_c0', 'rxn10344_c0','rxn05516_c0', 'rxn10481_c0', 'rxn08715_c0', 'rxn05517_c0']   # for methylocystis
add_trans_reactions = [rxn.replace('_c0','').replace('_e0','') for rxn in trans_reactions if not rxn in sub_trans_rxn]
add_trans_reactions.append('rxn13109')

In [10]:
new_model = cobra.io.read_sbml_model(org_name+'_model\\'+org_name+'_add_from_modelseed_3.xml')

import modelseedpy
modelseed_path = 'C:\\Users\\vickenlee\\ModelSEEDDatabase'
modelseed = modelseedpy.biochem.from_local(modelseed_path)

print('======================== \n %d reactions begin to added \n'%len(add_trans_reactions))

i,j = 0,0
for rxn_id in add_trans_reactions:
    direction = 'reversible'
    try:
        add_ms_reaction(new_model, rxn_id = rxn_id, modelseed = modelseed, compartment = 'c0', direction = direction)
        print(rxn_id+' added')
        i+=1
    except:
        print(rxn_id+' added wrong !!!')
        j+=1

print('\n %d / %d reactions added successfully \n ====================='%(i,i+j))

cobra.io.write_sbml_model(new_model,filename=org_name+'_model\\'+org_name+'_add_from_modelseed_4.xml')

 118 reactions begin to added 

rxn05561 added
rxn05625 added
rxn08734 added
rxn05319 added
rxn09295 added
rxn05145 added
rxn10122 added
rxn08173 added
rxn05528 added
rxn05209 added
rxn09003 added
rxn09001 added
rxn05627 added
rxn05468 added
rxn05467 added
rxn14427 added
rxn05602 added
rxn05654 added
rxn05542 added
rxn05545 added
rxn11268 added
rxn05541 added
rxn08192 added
rxn05161 added
rxn05539 added
rxn05466 added
rxn05174 added
rxn05585 added
rxn05595 added
rxn05535 added
rxn05219 added
rxn05150 added
rxn05533 added
rxn05552 added
rxn05172 added
rxn12850 added
rxn08628 added
rxn05175 added
rxn12848 added
rxn05163 added
rxn05547 added
rxn05682 added
rxn05515 added
rxn05148 added
rxn05527 added
rxn08924 added
rxn05619 added
rxn08762 added
rxn09657 added
rxn05594 added
rxn05512 added
rxn05312 added
rxn05315 added
rxn12851 added
rxn05153 added
rxn09193 added
rxn05195 added
rxn05579 added
rxn13892 added
rxn05618 added
rxn08241 added
rxn05544 added
rxn05603 added
rxn05599 added
rxn05537

### add exchange reactions  (donnot do it at this moment)

In [11]:
new_model = cobra.io.read_sbml_model(org_name+'_model\\'+org_name+'_add_from_modelseed_4.xml')
new_model

0,1
Name,Methylocystis_bryophila
Memory address,15ab7e53250
Number of metabolites,1177
Number of reactions,1039
Number of genes,0
Number of groups,0
Objective expression,1.0*bio1_biomass - 1.0*bio1_biomass_reverse_6e711
Compartments,"c0, e0"


In [19]:
import re
ex_rxn = []
dm_rxn = []
for rxn in drift_model.boundary:
    print(rxn.id)
    if 'cpd' in rxn.id:
        match = re.search(r'cpd\w*', rxn.id)
        id = match.group()
        if 'EX' in rxn.id:
            ex_rxn.append(id)
        if 'DM' in rxn.id:
            dm_rxn.append(id)
        name = drift_model.metabolites.get_by_id(match.group()).name
        print(match.group()+'\t'+name)
        print('\n')

rxn13783_c0
rxn13784_c0
rxn13782_c0
EX_cpd00322_e0
cpd00322_e0	L-Isoleucine_e0


EX_cpd00011_e0
cpd00011_e0	CO2_e0


EX_cpd11593_e0
cpd11593_e0	ala-L-asp-L_e0


EX_cpd00075_e0
cpd00075_e0	Nitrite_e0


EX_cpd15606_e0
cpd15606_e0	Gly-Tyr_e0


EX_cpd00036_e0
cpd00036_e0	Succinate_e0


EX_cpd00099_e0
cpd00099_e0	Cl-_e0


EX_cpd00058_e0
cpd00058_e0	Cu2+_e0


EX_cpd00220_e0
cpd00220_e0	Riboflavin_e0


EX_cpd11592_e0
cpd11592_e0	gly-glu-L_e0


EX_cpd00104_e0
cpd00104_e0	BIOT_e0


EX_cpd10516_e0
cpd10516_e0	fe3_e0


EX_cpd00119_e0
cpd00119_e0	L-Histidine_e0


EX_cpd11575_e0
cpd11575_e0	MOPS_e0


EX_cpd00033_e0
cpd00033_e0	Glycine_e0


EX_cpd00215_e0
cpd00215_e0	Pyridoxal_e0


EX_cpd00531_e0
cpd00531_e0	Hg2+_e0


EX_cpd00159_e0
cpd00159_e0	L-Lactate_e0


EX_cpd00132_e0
cpd00132_e0	L-Asparagine_e0


EX_cpd04099_e0
cpd04099_e0	Phosphonate_e0


EX_cpd00017_e0
cpd00017_e0	S-Adenosyl-L-methionine_e0


EX_cpd00393_e0
cpd00393_e0	Folate_e0


EX_cpd00254_e0
cpd00254_e0	Mg_e0


EX_cpd00035_e0
cpd00035_e

In [20]:
add_rxn = ['rxn13783','rxn13784','rxn13782']
for rxn_id in add_rxn: 
    direction = 'reversible'
    add_ms_reaction(new_model, rxn_id = rxn_id, modelseed = modelseed, compartment = 'c', direction = direction)

In [21]:
for rxn in new_model.reactions:
    if rxn.compartments == 'c0':
        rxn.compartments = 'c'
        rxn.id = rxn.id.replace('_c0','_c')
        rxn.name = rxn.name.replace('_c0','_c')
    elif rxn.compartments == 'e0':
        rxn.compartments = 'e'
        rxn.id = rxn.id.replace('_e0','_e')
        rxn.name = rxn.name.replace('_e0','_e')

for meta in new_model.metabolites:
    if meta.compartment == 'c0':
        meta.compartment = 'c'
        meta.id = meta.id.replace('_c0','_c')
        meta.name = meta.name.replace('_c0','_c')
    elif meta.compartment == 'e0':
        meta.compartment = 'e'
        meta.id = meta.id.replace('_e0','_e')
        meta.name = meta.name.replace('_e0','_e')

In [22]:
new_model.compartments={'c': 'cytosol', 'e': 'external'}

sub_ex_rxn = ['cpd00027_e0','cpd00531_e0','cpd04097_e0','cpd01012_e0']  # for methylocystis
add_ex_reactions = [rxn for rxn in ex_rxn if not rxn in sub_ex_rxn]
add_ex_reactions.append('cpd01024_e0')
for cpd_id in add_ex_reactions:
    try:
        new_model.metabolites.get_by_id(cpd_id[:-1])
        new_model.add_boundary(new_model.metabolites.get_by_id(cpd_id[:-1]), type="exchange")
        new_model.reactions.get_by_id('EX_'+cpd_id[:-1]).annotation = {}
    except:
        pass
for cpd_id in dm_rxn:
    new_model.add_boundary(new_model.metabolites.get_by_id(cpd_id[:-1]), type="demand")
    
cobra.io.write_sbml_model(new_model,filename=org_name+'_model\\'+org_name+'_add_from_modelseed_5.xml')
new_model

0,1
Name,Methylocystis_bryophila
Memory address,2c20aa48700
Number of metabolites,1177
Number of reactions,1142
Number of genes,0
Number of groups,0
Objective expression,1.0*bio1_biomass - 1.0*bio1_biomass_reverse_6e711
Compartments,"c, e"


## add reactions and metabolites note and exchange reactions

https://narrative.kbase.us/#catalog/apps/kb_uploadmethods/import_file_as_fba_model_from_staging

## add gene

In [14]:
import cobra
new_model = cobra.io.read_sbml_model(org_name+'_model\\'+org_name+'_add_from_modelseed_5.xml')
new_model

0,1
Name,Methylocystis_bryophila
Memory address,15ac3b98ac0
Number of metabolites,1177
Number of reactions,1142
Number of genes,0
Number of groups,0
Objective expression,1.0*bio1_biomass - 1.0*bio1_biomass_reverse_6e711
Compartments,"c, e"


In [22]:
import cobra
new_model = cobra.io.read_sbml_model(org_name+'_model\\'+org_name+'_add_from_modelseed_4.xml_gapfill_model.xml')
new_model.id = org_full_name+'_model'
new_model.compartments={'c0': 'Cytosol', 'e0': 'Extracellular'}
new_model

'' is not a valid SBML 'SId'.


0,1
Name,Methylocystis_bryophila_model
Memory address,15b7b2a9100
Number of metabolites,1177
Number of reactions,1139
Number of genes,0
Number of groups,0
Objective expression,1.0*bio1_biomass - 1.0*bio1_biomass_reverse_6e711
Compartments,"Cytosol, Extracellular"


In [20]:
import re
import pandas as pd

mapping_reaction = pd.read_excel(org_name+'_model\\'+org_name+'_mapping_reaction.xlsx', index_col=0)  
gene_tag= pd.read_csv(org_full_name+'_genome\\'+org_name+'_gene_tag.csv', index_col=False,sep='\t')
old_to_new = {}
for i in range(len(gene_tag)):
    old_to_new[gene_tag.iloc[i,4]] = gene_tag.iloc[i,1]


indrf = 0
m,n = 0,0
for reaction in new_model.reactions:
    drf_rxn = None
    try:
        drf_rxn = drift_model.reactions.get_by_id(reaction.id)
    except:
        pass
    if drf_rxn:
        reaction.gene_reaction_rule = drf_rxn.gene_reaction_rule.replace('gene-','')
        indrf +=1
        continue

    skip =0
    match = re.search(r'rxn\d*', reaction.id)
    if match:
        rxnid = match.group()
        try:
            genes = mapping_reaction.query('ModelSeed_ID == "'+ rxnid +'"')['entry_name'].values[0]
        except:
            skip = 1
        
    if skip == 1:
        continue
    else:
        oldtags = re.findall(r"B1812_\d+", genes)
        try:
            newtags = [old_to_new.get(oldtag) for oldtag in oldtags]
            result = " or ".join(newtags)
            m+=1
        except:
            result = " or ".join(oldtags)
            n+=1

        gene_list_str = '( '+result+' )'
        reaction.gene_reaction_rule = gene_list_str


    if (indrf+m+n)%100 == 0:
        print('%d / %d reactions genes added succesfully'%(indrf+m+n,len(new_model.reactions)))

print('\n--------------gene reaction rule added finished----------------')
print('There are %d reactions genes added from drift model.'%indrf)
print('There are %d reactions genes added from mapping table with new tags.'%m)
print('There are %d reactions genes added from mapping table with old tags.'%n)

cobra.io.write_sbml_model(new_model,filename=org_name+'_model\\'+org_name+'_add_from_modelseed_6.xml')


200 / 1139 reactions genes added succesfully
300 / 1139 reactions genes added succesfully
400 / 1139 reactions genes added succesfully
600 / 1139 reactions genes added succesfully
800 / 1139 reactions genes added succesfully
900 / 1139 reactions genes added succesfully

--------------gene reaction rule added finished----------------
There are 737 reactions genes added from drift model.
There are 399 reactions genes added from mapping table with new tags.
There are 1 reactions genes added from mapping table with old tags.


old tag B1812_22155 is BLAST to new tag DSM21852_01220
so fixed this gene in the xml file manually

string __DASH__c0 was also deleted

In [25]:
new_model = cobra.io.read_sbml_model(org_name+'_model\\'+org_name+'_add_from_modelseed_6.xml')
new_model


0,1
Name,Methylocystis_bryophila_model
Memory address,15b7e77d3d0
Number of metabolites,1177
Number of reactions,1139
Number of genes,803
Number of groups,0
Objective expression,1.0*bio1_biomass - 1.0*bio1_biomass_reverse_6e711
Compartments,"Cytosol, Extracellular"


## add trans and ex reaction from drift model

In [45]:
new_model = cobra.io.read_sbml_model(org_name+'_model\\'+org_name+'_add_from_modelseed_3.xml')

trans_reactions = []
ex_reactions = []
for reaction in drift_model.reactions:
    if reaction.id.startswith('EX_'):
        ex_reactions.append(reaction.id)
        continue
    for meta, sto in reaction.metabolites.items():
        if meta.id.endswith('_e0'):
            trans_reactions.append(reaction.id)
            break

sub_trans_rxn = ['rxn05226_c0', 'rxn10344_c0','rxn05516_c0', 'rxn10481_c0', 'rxn08715_c0', 'rxn05517_c0']   # for methylocystis

for rxnid in trans_reactions:
    if not rxnid in sub_trans_rxn:
        add_reaction_from_driftModel(new_model,drift_model,rxnid)
        new_model.reactions.get_by_id(rxnid).annotation = {}
        print(rxnid+' added')


boundary_reactions = [reaction.id for reaction in drift_model.boundary]
sub_ex_rxn = ['EX_cpd00027_e0','EX_cpd00531_e0','EX_cpd04097_e0','EX_cpd01012_e0']  # for methylocystis
for rxnid in boundary_reactions:
    if not rxnid in sub_ex_rxn:
        add_reaction_from_driftModel(new_model,drift_model,rxnid)
        new_model.reactions.get_by_id(rxnid).annotation = {}
        print(rxnid+' added')

cobra.io.write_sbml_model(new_model,filename=org_name+'_model\\'+org_name+'_add_from_modelseed_5_2.xml')


cobra_reaction = cobra.Reaction('rxn13109_c0')
cobra_reaction.name = 'Transport of methane'
cobra_reaction.lower_bound = 0
cobra_reaction.upper_bound = 1000
metabolites_to_add = {}
metabolites_to_add[cobra.Metabolite('cpd01024_e0', name='Methane_e0', compartment='e0', formula='CH4', charge=0)] = -1
metabolites_to_add[new_model.metabolites.get_by_id('cpd01024_c0')] = 1
cobra_reaction.add_metabolites(metabolites_to_add)
new_model.add_reactions([cobra_reaction])

new_model.add_boundary(new_model.metabolites.get_by_id("cpd01024_e0"), type="exchange")
new_model.reactions.get_by_id('EX_cpd01024_e0').annotation={}

rxn05561_c0 added
rxn05625_c0 added
rxn08734_c0 added
rxn05319_c0 added
rxn09295_c0 added
rxn05145_c0 added
rxn10122_c0 added
rxn08173_c0 added
rxn05528_c0 added
rxn05209_c0 added
rxn09003_c0 added
rxn09001_c0 added
rxn05627_c0 added
rxn05468_c0 added
rxn05467_c0 added
rxn14427_c0 added
rxn05602_c0 added
rxn05654_c0 added
rxn05542_c0 added
rxn05545_c0 added
rxn11268_c0 added
rxn05541_c0 added
rxn08192_c0 added
rxn05161_c0 added
rxn05539_c0 added
rxn05466_c0 added
rxn05174_c0 added
rxn05585_c0 added
rxn05595_c0 added
rxn05535_c0 added
rxn05219_c0 added
rxn05150_c0 added
rxn05533_c0 added
rxn05552_c0 added
rxn05172_c0 added
rxn12850_c0 added
rxn08628_c0 added
rxn05175_c0 added
rxn12848_c0 added
rxn05163_c0 added
rxn05547_c0 added
rxn05682_c0 added
rxn05515_c0 added
rxn05148_c0 added
rxn05527_c0 added
rxn08924_c0 added
rxn05619_c0 added
rxn08762_c0 added
rxn09657_c0 added
rxn05594_c0 added
rxn05512_c0 added
rxn05312_c0 added
rxn05315_c0 added
rxn12851_c0 added
rxn05153_c0 added
rxn09193_c