# N starvation 

## Which N sources allow MED4 to grow

check out Escher visualization https://escher.readthedocs.io/en/latest/getting_started.html

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import cobra
import numpy as np
import seaborn as sns
import itertools
from matplotlib.colors import LogNorm, Normalize


In [2]:
import subprocess
import time


In [3]:
from cobra.flux_analysis import flux_variability_analysis
from cobra.flux_analysis import production_envelope


In [4]:
from cobra.flux_analysis import (
    single_gene_deletion, single_reaction_deletion, double_gene_deletion,
    double_reaction_deletion)

In [5]:
sns.hls_palette

<function seaborn.palettes.hls_palette(n_colors=6, h=0.01, l=0.6, s=0.65, as_cmap=False)>

In [6]:
# values from Ofaim at el.

PARAMETER_VALUES = [#"Name",     "Reaction ID",          "Lower bound", "UpperBound"
                   ["HCO3",      "HCO3EXcar",            -8,            0],
                   #["Nitrogen",  "AmmoniaEX",            -0.56,         0],
                   #["Phosphate", "FAKEOrthophosphateEX", -0.1,          0],
                   ["Light",     "LightEX",              -150,          0]]
CO2MAX = 0.82

# Block fake reactions
FAKE_TRANSPORT = ["AminosugarsOUT", "FAKEAAOUT", "FAKEABPOUT", "FAKEacpTRANS", "FAKEApoacpTRANS", "FAKEThioredoxinTRANS", 
                  "FreefattyacidsOUT", "7NMeth7carbOUT", "ArtificialproteinOUT", "FADOUT", "LipoylproteinTRANS", "MenaquinoneOUT", 
                  "NicotinateOUT", "THFpolyglutOUT", "Thiamin_dpOUT"]


In [7]:
def get_elements(rid):
    r = model.reactions.get_by_id(rid)
    m = r.reactants[0]    
    elements = m.elements.copy()
    
    res = {
        'reaction'   : rid,
        'metabolite' : m.name,
        'mid'        : m.id,
        'N'          : 'N' in elements,
        'C'          : 'C' in elements,
        
    }
    return pd.Series(res)

In [8]:
def alt_get_elements(rid):
    r = alt_model.reactions.get_by_id(rid)
    m = r.reactants[0]    
    elements = m.elements.copy()
    
    res = {
        'reaction'   : rid,
        'metabolite' : m.name,
        'mid'        : m.id,
        'N'          : 'N' in elements,
        'C'          : 'C' in elements,
        
    }
    return pd.Series(res)

In [100]:
def uni_get_elements(model, rid):
    r = model.reactions.get_by_id(rid)
    m = r.reactants[0]    
    elements = m.elements.copy()
    
    res = {
        'reaction'   : rid,
        'metabolite' : m.name,
        'mid'        : m.id,
        'N'          : 'N' in elements,
        'C'          : 'C' in elements,
        
    }
    return pd.Series(res)

# Import model and manipulate based on Ofaim at el

In [9]:
import os
model_dpath = os.path.join('..', 'Model_files')
model_fname = 'iSO595v7_with_formula.xml'
model_fpath = os.path.join(model_dpath, model_fname)


In [10]:
model = cobra.io.read_sbml_model(model_fpath)

Set parameter Username
Academic license - for non-commercial use only - expires 2023-07-13


In [11]:
# manipulations copied from Ofaim at el.

remove_blocked = False

# Block H2S
model.reactions.H2SEX.lower_bound = 0

# Block fake transports
for rid in FAKE_TRANSPORT:
    model.reactions.get_by_id(rid).bounds = (0,0)

# Remove blocked reactions
if remove_blocked:
    blocked = cobra.flux_analysis.find_blocked_reactions(model, open_exchanges = True)
    print('blocked', len(blocked), blocked)
    model.remove_reactions([model.reactions.get_by_id(r_id) for r_id in blocked])

# Block maximum CO2 production
model.reactions.CO2EX.bounds = (0, CO2MAX)

for i, row in enumerate(PARAMETER_VALUES):
    # Row: Name, Reaction ID, lower bound, upper bound
    key = row[0]
    reaction_id = row[1]
    lower_bound = row[2]
    upper_bound = row[3]
    r = model.reactions.get_by_id(reaction_id)
    # Fix flux
    r.bounds = (lower_bound, upper_bound)

In [12]:
exchange_ids = [r.id for r in model.exchanges] # + ["R00024"]

In [13]:
print('reactions', len(model.reactions))
print('metabolites', len(model.metabolites))
print('genes', len(model.genes))

reactions 994
metabolites 802
genes 595


In [14]:
model

0,1
Name,COBRAModel
Memory address,0x07ff365661490
Number of metabolites,802
Number of reactions,994
Number of groups,55
Objective expression,1.0*BIOMASS - 1.0*BIOMASS_reverse_69053
Compartments,"Cytoplasm, Extracellular, unknownCompartment2, Periplasm, unknownCompartment5"


In [15]:
model.metabolites.get_by_id('Carbamate[c]')

0,1
Metabolite identifier,Carbamate[c]
Name,Carbamate
Memory address,0x7ff365628f40
Formula,CH3NO2
Compartment,c
In 4 reaction(s),"R03546, R01395, R10949, R07316"


In [16]:
cobra.Configuration()

Attribute,Description,Value
solver,Mathematical optimization solver,gurobi
tolerance,"General solver tolerance (feasibility, integrality, etc.)",1e-07
lower_bound,Default reaction lower bound,-1000.0
upper_bound,Default reaction upper bound,1000.0
processes,Number of parallel processes,11
cache_directory,Path for the model cache,/home/oweissberg/.cache/cobrapy
max_cache_size,Maximum cache size in bytes,104857600
cache_expiration,Model cache expiration time in seconds (if any),


In [17]:
model.reactions.get_by_id('BIOMASS')

0,1
Reaction identifier,BIOMASS
Name,Biomass formation
Memory address,0x7ff3654336a0
Stoichiometry,104.0 ATP[c] + 0.0324116276714271 BioPool[c] + 0.0268408791654006 Bmineral[c] + 0.0559910868023904 Cell_wall[c] + 0.00111921401802897 Free_nucleic_acids[c] + 104.0 H2O[c] + 0.649245416793275...  104.0 ATP + 0.0324116276714271 BioPool + 0.0268408791654006 Trace metals and minerals + 0.0559910868023904 Cell wall Biomass precursor + 0.00111921401802897 Free nucleic acids + 104.0 H2O +...
GPR,
Lower bound,0.0
Upper bound,1000.0


# Get a map of med4 metabolites to BiGG ids

In [18]:
bigg_df =  pd.read_excel('MED4_to_Bigg.xlsx',)

In [19]:
bigg_df = bigg_df.loc[~(bigg_df.mid.isin(['D_Glucose[e]']) & bigg_df.BiggID.isna())]

# get ref info on all compounds

In [20]:
fpath = '~/work/tools//ModelSEEDDatabase/Biochemistry/compounds.tsv'
ref_df = pd.read_csv(fpath, sep='\t')

  ref_df = pd.read_csv(fpath, sep='\t')


In [21]:
kegg_compound_df = pd.read_csv('/home/oweissberg/work/CC1A3/genomes/kegg_compounds.csv', )


In [22]:
ref_df['BiggID'] = ref_df.aliases.str.extract(r'BiGG: ([^;\|\:]+)')
ref_df = ref_df.loc[ref_df.source.isin(['Primary Database'])]
ref_df['kegg_id'] = ref_df.aliases.str.extract(r'KEGG: ([^;\|\:]+)')

In [23]:
ref_df = pd.merge(ref_df, kegg_compound_df, left_on='kegg_id', right_on='compound_id', how='left')

In [24]:
ref_df

Unnamed: 0.1,id,abbreviation,name,formula,mass,source,inchikey,charge,is_core,is_obsolete,...,smiles,notes,BiggID,kegg_id,Unnamed: 0,main,sub,path,ecpath,compound_id
0,cpd00001,h2o,H2O,H2O,18.0,Primary Database,XLYOFNOQVPJJNP-UHFFFAOYSA-N,0,1,0,...,O,GC|EQ|EQU,h2o,C00001,,,,,,
1,cpd00002,atp,ATP,C10H13N5O13P3,504.0,Primary Database,ZKHQWZAMYRWXGA-KQYNXXCUSA-K,-3,1,0,...,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O...,GC|EQ|EQU,atp,C00002,255.0,Nucleic acids,Nucleotides [Fig],Ribonucleotides,C00002 ATP,C00002
2,cpd00002,atp,ATP,C10H13N5O13P3,504.0,Primary Database,ZKHQWZAMYRWXGA-KQYNXXCUSA-K,-3,1,0,...,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O...,GC|EQ|EQU,atp,C00002,390.0,Vitamins and cofactors,Cofactors,Coenzymes [Fig],C00002 ATP,C00002
3,cpd00003,nad,NAD,C21H26N7O14P2,662.0,Primary Database,BAWFJGJZGIEFAR-NNYOXOHSSA-M,-1,1,0,...,NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)([O-])OP(...,GC|EQ|EQU,nad,C00003,384.0,Vitamins and cofactors,Cofactors,Coenzymes [Fig],C00003 NAD,C00003
4,cpd00004,nadh,NADH,C21H27N7O14P2,663.0,Primary Database,BOPGDPNILDQYTO-NNYOXOHSSA-L,-2,1,0,...,NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)(...,GC|EQ|EQU,nadh,C00004,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30641,cpd37265,phosphoethanolamine-cellulose,phosphoethanolamine-cellulose,C28H52N2O25P2R2,10000000.0,Primary Database,,0,0,0,...,*[C@@H]1O[C@H](COP(=O)([O-])OCC[NH3+])[C@@H](O...,GC,,,,,,,,
30642,cpd37266,11Z-3-oxo-icos-11-enoyl-ACPs,11Z-3-oxo-icos-11-enoyl-ACPs,C31H54N2O9PRS,10000000.0,Primary Database,,-2,0,0,...,*[NH2+][C@@H](COP(=O)([O-])OCC(C)(C)[C@@H](O)C...,GC,,,,,,,,
30643,cpd37267,DNA-with-3-prime-pp-5-prime-A-cap,DNA-with-3-prime-pp-5-prime-A-cap,C15H19N5O15P3R2,10000000.0,Primary Database,,-3,0,0,...,*OP(=O)([O-])OC[C@H]1O[C@@H](*)C[C@@H]1OP(=O)(...,GC,,,,,,,,
30644,cpd37268,L-Threonyl-Threonyl-Carrier-Proteins,L-Threonyl-Threonyl-Carrier-Proteins,C18H34N4O10PR2S,10000000.0,Primary Database,,1,0,0,...,*[NH2+][C@@H](COP(=O)([O-])OCC(C)(C)[C@@H](O)C...,GC,,,,,,,,


In [25]:
kegg_compound_df.loc[kegg_compound_df.compound_id.isin(['C00004'])]

Unnamed: 0.1,Unnamed: 0,main,sub,path,ecpath,compound_id


# Compare exchange reactions/metabolites of the two models

In [26]:
exchange_ids = [r.id for r in model.exchanges] 
pro_med_df = pd.DataFrame({'mediumExchange' : exchange_ids})
pro_med_df = pro_med_df.mediumExchange.apply(get_elements)


In [27]:
#pro_df = pd.read_excel('PRO_N_sources.xlsx', sheet_name='secretion')
alt_dpath = '../../HOT1A3-model/N_Starvation/'
#alt_med_df = pd.read_excel(os.path.join(alt_dpath, 'HOT1A3_medium_NCBI.xlsx'))
alt_model_dpath = os.path.join('../../HOT1A3-model', 'Models')
alt_model_fname = 'HOT1A3_ncbi.xml'
alt_model_fpath = os.path.join(alt_model_dpath, alt_model_fname)


In [117]:
universal_bacteria_fpath = '/home/oweissberg/work/tools/anaconda3/lib/python3.9/site-packages/carveme/data/generated/universe_bacteria.xml.gz'
#universal_bacteria_fpath = '/home/oweissberg/work/tools/anaconda3/lib/python3.9/site-packages/carveme/data/generated/universe_gramneg.xml.gz'
#universal_bacteria_fpath = '/home/oweissberg/work/tools/anaconda3/lib/python3.9/site-packages/carveme/data/generated/universe_cyanobacteria.xml.gz'
unibac_model = cobra.io.read_sbml_model(universal_bacteria_fpath)


In [127]:
#universal_bacteria_fpath = '/home/oweissberg/work/tools/anaconda3/lib/python3.9/site-packages/carveme/data/generated/universe_bacteria.xml.gz'
#universal_bacteria_fpath = '/home/oweissberg/work/tools/anaconda3/lib/python3.9/site-packages/carveme/data/generated/universe_gramneg.xml.gz'
universal_cyano_fpath = '/home/oweissberg/work/tools/anaconda3/lib/python3.9/site-packages/carveme/data/generated/universe_cyanobacteria.xml.gz'
unicyano_model = cobra.io.read_sbml_model(universal_cyano_fpath)


No objective coefficients in model. Unclear what should be optimized


In [111]:
alt_model = cobra.io.read_sbml_model(alt_model_fpath)
alt_exchange_ids = [r.id for r in alt_model.exchanges] 
alt_med_df = pd.DataFrame({'mediumExchange' : alt_exchange_ids})
alt_med_df = alt_med_df.mediumExchange.apply(alt_get_elements)


In [118]:
uni_exchange_ids = [r.id for r in unibac_model.exchanges] 
uni_med_df = pd.DataFrame({'mediumExchange' : uni_exchange_ids})
uni_med_df = uni_med_df.mediumExchange.apply(lambda  x: uni_get_elements(unibac_model, x))


In [128]:
cyano_exchange_ids = [r.id for r in unicyano_model.exchanges] 
cyano_med_df = pd.DataFrame({'mediumExchange' : cyano_exchange_ids})
cyano_med_df = cyano_med_df.mediumExchange.apply(lambda  x: uni_get_elements(unicyano_model, x))


In [120]:
uni_med_df

Unnamed: 0,reaction,metabolite,mid,N,C
0,EX_12dgr160_e,"1,2-Diacyl-sn-glycerol (dihexadecanoyl, n-C16:0)",12dgr160_e,False,True
1,EX_12dgr180_e,"1,2-Diacyl-sn-glycerol (dioctadecanoyl, n-C18:0)",12dgr180_e,False,True
2,EX_12ppd__R_e,"(R)-Propane-1,2-diol",12ppd__R_e,False,True
3,EX_12ppd__S_e,"(S)-Propane-1,2-diol",12ppd__S_e,False,True
4,EX_13dampp_e,1 3 Diaminopropane C3H12N2,13dampp_e,True,True
...,...,...,...,...,...
640,EX_xylan4_e,"Xylan (4 backbone units, 1 glcur side chain)",xylan4_e,False,True
641,EX_xylan8_e,"Xylan (8 backbone units, 2 glcur side chain)",xylan8_e,False,True
642,EX_xylb_e,Xylobiose,xylb_e,False,True
643,EX_xylu__L_e,L-Xylulose,xylu__L_e,False,True


In [29]:
pro_med_df = pd.merge(pro_med_df, bigg_df[['mid', 'BiggID']].drop_duplicates(), on='mid', how='left')

In [30]:
pro_med_df.loc[pro_med_df.mid.isin(['Copper[e]']), 'BiggID'] = 'cu2'

In [31]:
pro_med_df.loc[pro_med_df.mid.isin(['Ammonia[e]']), 'BiggID'] = 'nh4'

In [32]:
pro_med_df.head()

Unnamed: 0,reaction,metabolite,mid,N,C,BiggID
0,AminosugarsEX,Amino sugars pool,Amino_sugars[e],False,False,
1,FAKEAAEX,amino_acid_pool_e,amino_acid_pool[e],False,False,
2,FAKEABPEX,Adenosine 3_5-bisphosphate,Adenosine_3_5_bisphosphate[e],True,True,pap
3,FAKEacpEX,Acyl-carrier protein,Acyl_carrier_protein[e],False,False,ACP
4,FAKEApoacpEX,Apo-(acyl-carrier-protein),Apo_acyl_carrier_protein[e],False,False,


In [33]:
alt_med_df.head()

Unnamed: 0,reaction,metabolite,mid,N,C
0,EX_14glucan_e,"1,4-alpha-D-glucan",14glucan_e,False,True
1,EX_25dkglcn_e,"2,5-diketo-D-gluconate",25dkglcn_e,False,True
2,EX_2ameph_e,2-Aminoethylphosphonate,2ameph_e,True,True
3,EX_2m35mdntha_e,N 2 methyl 3 5 dinitrophenyl 4 methyl 3 5 di...,2m35mdntha_e,True,False
4,EX_2pglyc_e,2-Phosphoglycolate,2pglyc_e,False,True


In [34]:
pro_med_df.shape

(101, 6)

In [35]:
alt_med_df.shape

(257, 5)

In [36]:
bigg_df.loc[bigg_df.BiggID.isin(['2pglyc'])].T

Unnamed: 0,389
Unnamed: 0,389
mid,2_Phosphoglycolate[c]
name,2-Phosphoglycolate
formula,C2H5O6P
charge,0
compartment_x,[c]
cleaned_name,2_phosphoglycolate
cleaned_mid,2_phosphoglycolate
id,cpd00727
BiggID,2pglyc


In [37]:
alt_med_df.columns, pro_med_df.columns

(Index(['reaction', 'metabolite', 'mid', 'N', 'C'], dtype='object'),
 Index(['reaction', 'metabolite', 'mid', 'N', 'C', 'BiggID'], dtype='object'))

In [38]:
alt_med_df['BiggID'] = alt_med_df.mid.str.rpartition('_',)[0]

In [124]:
uni_med_df['BiggID'] = uni_med_df.mid.str.rpartition('_',)[0]

In [130]:
cyano_med_df['BiggID'] = cyano_med_df.mid.str.rpartition('_',)[0]

In [39]:
exchange_df = pd.merge(pro_med_df, alt_med_df, left_on='BiggID', right_on='BiggID', how='outer', suffixes=['_pro', '_alt'])

In [40]:
exchange_df.BiggID.isna().value_counts()

False    291
True      10
Name: BiggID, dtype: int64

In [41]:
exchange_df.loc[exchange_df.BiggID.str.contains('ala__L', case=False, na=False)]

Unnamed: 0,reaction_pro,metabolite_pro,mid_pro,N_pro,C_pro,BiggID,reaction_alt,metabolite_alt,mid_alt,N_alt,C_alt
54,L_AlanineEX,L-Alanine,L_Alanine[e],True,True,ala__L,EX_ala__L_e,L-Alanine,ala__L_e,True,True
238,,,,,,met_L_ala__L,EX_met_L_ala__L_e,Met L ala L C8H16N2O3S,met_L_ala__L_e,True,True


In [42]:
exchange_df['map_status'] = 'mapped'
exchange_df.loc[exchange_df.metabolite_alt.isna(), 'map_status'] = 'PRO only'
exchange_df.loc[exchange_df.metabolite_pro.isna(), 'map_status'] = 'ALT only'
exchange_df.map_status.value_counts()

ALT only    200
mapped       57
PRO only     44
Name: map_status, dtype: int64

In [43]:
exchange_df.isna().sum()

reaction_pro      200
metabolite_pro    200
mid_pro           200
N_pro             200
C_pro             200
BiggID             10
reaction_alt       44
metabolite_alt     44
mid_alt            44
N_alt              44
C_alt              44
map_status          0
dtype: int64

# Look for potential cross feeding

In [44]:
pro_df = pd.read_excel('data/PRO_N_sources.xlsx', sheet_name='secretion')
alt_dpath = '../../HOT1A3-model/N_Starvation/'
alt_df = pd.read_excel(os.path.join(alt_dpath, 'ALT_N_sources.xlsx'), sheet_name='secretion')
alt2_df = pd.read_excel(os.path.join(alt_dpath, 'ALT_N_sources.xlsx'), sheet_name='secretion2')


In [45]:
alt_df.head()

Unnamed: 0.1,Unnamed: 0,secretion,minimum,maximum,uptake,metabolite_uptake,mid_uptake,N_uptake,C_uptake,metabolite_secretion,mid_secretion,N_secretion,C_secretion
0,0,sink_2ohph_c,0.0,66.658441,EX_cytd_e,Cytidine,cytd_e,True,True,2-Octaprenyl-6-hydroxyphenol,2ohph_c,False,True
1,1,sink_4crsol_c,0.0,279.476874,EX_cytd_e,Cytidine,cytd_e,True,True,P-Cresol,4crsol_c,False,True
2,3,sink_hemeO_c,0.0,65.829612,EX_cytd_e,Cytidine,cytd_e,True,True,Heme O C49H56FeN4O5,hemeO_c,True,True
3,4,sink_mobd_c,0.0,1000.0,EX_cytd_e,Cytidine,cytd_e,True,True,Molybdate,mobd_c,False,False
4,5,sink_sheme_c,0.0,95.624557,EX_cytd_e,Cytidine,cytd_e,True,True,Siroheme C42H36FeN4O16,sheme_c,True,True


In [46]:
alt2_df.loc[alt2_df.N_uptake, 'metabolite_uptake']

253      Ammonium
254      Ammonium
255      Ammonium
256      Ammonium
257      Ammonium
           ...   
20191     Cyanate
20192     Cyanate
20193     Cyanate
20194     Cyanate
20195     Cyanate
Name: metabolite_uptake, Length: 5880, dtype: object

In [47]:
alt2_df.head()

Unnamed: 0.1,Unnamed: 0,secretion,minimum,maximum,uptake,uptake2,metabolite_uptake,mid_uptake,N_uptake,C_uptake,metabolite_uptake2,mid_uptake2,N_uptake2,C_uptake2,metabolite_secretion,mid_secretion,N_secretion,C_secretion
0,0,sink_2ohph_c,0.0,68.865821,EX_14glucan_e,EX_nh4_e,"1,4-alpha-D-glucan",14glucan_e,False,True,Ammonium,nh4_e,True,False,2-Octaprenyl-6-hydroxyphenol,2ohph_c,False,True
1,1,sink_4crsol_c,0.0,308.174662,EX_14glucan_e,EX_nh4_e,"1,4-alpha-D-glucan",14glucan_e,False,True,Ammonium,nh4_e,True,False,P-Cresol,4crsol_c,False,True
2,3,sink_hemeO_c,0.0,59.26159,EX_14glucan_e,EX_nh4_e,"1,4-alpha-D-glucan",14glucan_e,False,True,Ammonium,nh4_e,True,False,Heme O C49H56FeN4O5,hemeO_c,True,True
3,4,sink_mobd_c,0.0,1000.0,EX_14glucan_e,EX_nh4_e,"1,4-alpha-D-glucan",14glucan_e,False,True,Ammonium,nh4_e,True,False,Molybdate,mobd_c,False,False
4,5,sink_sheme_c,0.0,77.873881,EX_14glucan_e,EX_nh4_e,"1,4-alpha-D-glucan",14glucan_e,False,True,Ammonium,nh4_e,True,False,Siroheme C42H36FeN4O16,sheme_c,True,True


In [48]:
pro_df.head()

Unnamed: 0.1,Unnamed: 0,secretion,minimum,maximum,uptake,metabolite_uptake,mid_uptake,N_uptake,C_uptake,metabolite_secretion,mid_secretion,N_secretion,C_secretion
0,0,B12EX,0.0,7.94191,GuanosineEX,Guanosine[e],Guanosine[e],True,True,Cobamide coenzyme,Cobamide_coenzyme[e],True,True
1,1,BiotinEX,0.0,1.985477,GuanosineEX,Guanosine[e],Guanosine[e],True,True,Biotin,Biotin[e],True,True
2,2,MethanolEX,4.2e-05,1.985519,GuanosineEX,Guanosine[e],Guanosine[e],True,True,Methanol,Methanol[e],False,True
3,3,SAdenosyl4methylthio2oxobutanoateEX,4.2e-05,1.985519,GuanosineEX,Guanosine[e],Guanosine[e],True,True,S-Adenosyl-4-methylthio-2-oxobutanoate,S_Adenosyl_4_methylthio_2_oxobutanoate[e],True,True
4,4,SMethyl5thioDriboseEX,0.0,15.884229,GuanosineEX,Guanosine[e],Guanosine[e],True,True,S-Methyl-5-thio-D-ribose,S_Methyl_5_thio_D_ribose[e],False,True


In [49]:
exchange_df.head()

Unnamed: 0,reaction_pro,metabolite_pro,mid_pro,N_pro,C_pro,BiggID,reaction_alt,metabolite_alt,mid_alt,N_alt,C_alt,map_status
0,AminosugarsEX,Amino sugars pool,Amino_sugars[e],False,False,,,,,,,PRO only
1,FAKEAAEX,amino_acid_pool_e,amino_acid_pool[e],False,False,,,,,,,PRO only
2,FAKEApoacpEX,Apo-(acyl-carrier-protein),Apo_acyl_carrier_protein[e],False,False,,,,,,,PRO only
3,FreefattyacidsEX,Free fatty acids,Free_fatty_acids[e],False,False,,,,,,,PRO only
4,ArtificialproteinEX,Artificial Protein biomass precursor,Artificial_protein[e],False,False,,,,,,,PRO only


In [50]:
for i in ['PRO_UPTAKE', 'PRO_SECRETION', 'ALT_UPTAKE', 'ALT_SECRETION']:
    exchange_df[i] = False

exchange_df.loc[exchange_df.mid_pro.isin(pro_df.mid_uptake), 'PRO_UPTAKE'] = True
exchange_df.loc[exchange_df.mid_pro.isin(pro_df.mid_secretion), 'PRO_SECRETION'] = True



In [51]:
exchange_df.loc[exchange_df.mid_alt.isin(alt_df.mid_uptake), 'ALT_UPTAKE'] = True
exchange_df.loc[exchange_df.mid_alt.isin(alt_df.mid_secretion), 'ALT_SECRETION'] = True
exchange_df.loc[exchange_df.mid_alt.isin(alt2_df.mid_uptake), 'ALT_UPTAKE'] = True
exchange_df.loc[exchange_df.mid_alt.isin(alt2_df.mid_uptake2), 'ALT_UPTAKE'] = True
exchange_df.loc[exchange_df.mid_alt.isin(alt2_df.mid_secretion), 'ALT_SECRETION'] = True



In [52]:
exchange_df.groupby(['PRO_UPTAKE', 'PRO_SECRETION', 'ALT_UPTAKE', 'ALT_SECRETION']).size().reset_index()

Unnamed: 0,PRO_UPTAKE,PRO_SECRETION,ALT_UPTAKE,ALT_SECRETION,0
0,False,False,False,False,99
1,False,False,False,True,23
2,False,False,True,False,110
3,False,False,True,True,12
4,False,True,False,False,21
5,False,True,False,True,2
6,False,True,True,False,13
7,False,True,True,True,5
8,True,False,True,False,1
9,True,True,False,False,3


In [53]:
exchange_df[['PRO_UPTAKE', 'PRO_SECRETION', 'ALT_UPTAKE', 'ALT_SECRETION']].sum()


PRO_UPTAKE        16
PRO_SECRETION     56
ALT_UPTAKE       152
ALT_SECRETION     48
dtype: int64

In [54]:
ref_df.columns

Index(['id', 'abbreviation', 'name', 'formula', 'mass', 'source', 'inchikey',
       'charge', 'is_core', 'is_obsolete', 'linked_compound', 'is_cofactor',
       'deltag', 'deltagerr', 'pka', 'pkb', 'abstract_compound',
       'comprised_of', 'aliases', 'smiles', 'notes', 'BiggID', 'kegg_id',
       'Unnamed: 0', 'main', 'sub', 'path', 'ecpath', 'compound_id'],
      dtype='object')

In [55]:
exchange_df = pd.merge(exchange_df, 
         ref_df[['BiggID', 'name', 'kegg_id', 'main', 'formula', 'aliases']].dropna(subset=['BiggID', 'kegg_id'], how='any'
                                                             ).drop_duplicates(subset=['BiggID']), 
         on='BiggID', how='left')

In [56]:
exchange_df.loc[exchange_df.name.isna() & ~exchange_df.metabolite_alt.isna(), 'name'] = exchange_df.loc[exchange_df.name.isna() & ~exchange_df.metabolite_alt.isna(), 'metabolite_alt']
exchange_df.loc[exchange_df.name.isna() & ~exchange_df.metabolite_pro.isna(), 'name'] = exchange_df.loc[exchange_df.name.isna() & ~exchange_df.metabolite_pro.isna(), 'metabolite_pro']


In [57]:
exchange_df['N'] = exchange_df.N_alt
exchange_df.loc[exchange_df.N.isna(), 'N'] = exchange_df.loc[exchange_df.N.isna(), 'N_pro'] 
exchange_df['C'] = exchange_df.C_alt
exchange_df.loc[exchange_df.C.isna(), 'C'] = exchange_df.loc[exchange_df.C.isna(), 'C_pro'] 


In [58]:
exchange_df.loc[(~exchange_df.C) & (~exchange_df.N), 'N/C'] = 'none'
exchange_df.loc[exchange_df.C & exchange_df.N, 'N/C'] = 'N+C'
exchange_df.loc[exchange_df.C & (~exchange_df.N), 'N/C'] = 'C'
exchange_df.loc[(~exchange_df.C) & exchange_df.N, 'N/C'] = 'N'


In [82]:
exchange_df.loc[exchange_df.main.isna(), 'main'] = ''

In [83]:
exchange_df.groupby(['N', 'C', 'N/C']).size().reset_index()

Unnamed: 0,N,C,N/C,0
0,False,False,none,41
1,False,True,C,116
2,True,False,N,6
3,True,True,N+C,138


In [84]:
exchange_df.columns

Index(['reaction_pro', 'metabolite_pro', 'mid_pro', 'N_pro', 'C_pro', 'BiggID',
       'reaction_alt', 'metabolite_alt', 'mid_alt', 'N_alt', 'C_alt',
       'map_status', 'PRO_UPTAKE', 'PRO_SECRETION', 'ALT_UPTAKE',
       'ALT_SECRETION', 'name', 'kegg_id', 'main', 'formula', 'aliases', 'N',
       'C', 'N/C', 'PRO->ALT', 'ALT->PRO', 'PRO->ALT x', 'ALT->PRO x'],
      dtype='object')

In [85]:
p2a_series = pd.Series(list(zip(exchange_df.map_status,  exchange_df.PRO_SECRETION,exchange_df.ALT_UPTAKE,)))

p2a_map = {
    # map_status, PRO_SECRETION, ALT_UPTAKE, 
    ('PRO only', False, False) : '|--- --X|', 
    ('PRO only', True,  False) : '|--> --X|', 
    ('mapped',   True,  False) : '|--> ---|', 
    ('mapped',   True,  True ) : '|--> -->|', 
    ('mapped',   False, False) : '|--- ---|', 
    ('mapped',   False, True ) : '|--- -->|', 
    ('ALT only', False, True ) : '|X-- -->|', 
    ('ALT only', False, False) : '|X-- ---|', 
}



In [86]:
a2p_series = pd.Series(list(zip(exchange_df.map_status,  exchange_df.PRO_UPTAKE,exchange_df.ALT_SECRETION,)))

a2p_map = {
    # map_status,  PRO_UPTAKE,ALT_SECRETION
    ('mapped',   False, False) : '|--- ---|', 
    ('PRO only', False, False) : '|--- --X|', 
    ('ALT only', False, False) : '|X-- ---|', 
    ('PRO only', True , False) : '|<-- --X|', 
    ('mapped',   False, True ) : '|--- <--|', 
    ('mapped',   True , False) : '|<-- ---|', 
    ('mapped',   True , True ) : '|<-- <--|', 
    ('ALT only', False, True ) : '|X-- <--|', 
}
a2p_series.unique()

array([('PRO only', False, False), ('mapped', False, False),
       ('mapped', False, True), ('mapped', True, True),
       ('mapped', True, False), ('PRO only', True, False),
       ('ALT only', False, False), ('ALT only', False, True)],
      dtype=object)

In [87]:
exchange_df['PRO->ALT x'] = p2a_series.map(p2a_map)
exchange_df['ALT->PRO x'] = a2p_series.map(a2p_map)

In [88]:
exchange_df['PRO->ALT'] = exchange_df['PRO->ALT x'].str.replace('X', '-', regex=False)
exchange_df['ALT->PRO'] = exchange_df['ALT->PRO x'].str.replace('X', '-', regex=False)

In [89]:
exchange_df.groupby(['main', 'N/C', 'PRO->ALT', 'ALT->PRO'], dropna=False).size().reset_index()

Unnamed: 0,main,N/C,PRO->ALT,ALT->PRO,0
0,,C,|--- ---|,|--- ---|,21
1,,C,|--- ---|,|--- <--|,8
2,,C,|--- -->|,|--- ---|,33
3,,C,|--- -->|,|--- <--|,2
4,,C,|--> ---|,|--- ---|,3
...,...,...,...,...,...
56,Vitamins and cofactors,C,|--- -->|,|--- ---|,1
57,Vitamins and cofactors,N+C,|--- ---|,|--- ---|,4
58,Vitamins and cofactors,N+C,|--- ---|,|--- <--|,1
59,Vitamins and cofactors,N+C,|--> ---|,|--- ---|,3


In [139]:
exchange_df.loc[exchange_df.BiggID.isin(uni_med_df.BiggID) & exchange_df.mid_alt.isna(), 'map_status'] = 'PRO and Universal not ALT'

In [140]:
exchange_df.loc[exchange_df.BiggID.isin(cyano_med_df.BiggID) & exchange_df.mid_pro.isna(), 'map_status'] = 'ALT and Universal Cyano not PRO'

In [142]:
exchange_df.map_status.value_counts()

ALT and Universal Cyano not PRO    199
mapped                              57
PRO only                            28
PRO and Universal not ALT           16
ALT only                             1
Name: map_status, dtype: int64

In [151]:
exchange_df.loc[exchange_df.main.isin(['']), 'main'] = 'other'

In [152]:
exchange_df.main.unique()

array(['other', 'Vitamins and cofactors', 'Peptides', 'Carbohydrates',
       'Organic acids', 'Nucleic acids', 'Lipids'], dtype=object)

In [153]:
mini_exchange_df = exchange_df[['main', 'name', 'N/C', 'PRO->ALT x', 'ALT->PRO x', 'formula', 'aliases',
             'map_status', 'PRO_UPTAKE', 'PRO_SECRETION', 'ALT_UPTAKE', 'ALT_SECRETION',
             'BiggID','kegg_id',
             'reaction_pro', 'metabolite_pro', 'mid_pro', 'N_pro', 'C_pro', 
             'reaction_alt', 'metabolite_alt', 'mid_alt', 'N_alt', 'C_alt',
            ]]
             

In [155]:
mini_exchange_df.columns

Index(['main', 'name', 'N/C', 'PRO->ALT x', 'ALT->PRO x', 'formula', 'aliases',
       'map_status', 'PRO_UPTAKE', 'PRO_SECRETION', 'ALT_UPTAKE',
       'ALT_SECRETION', 'BiggID', 'kegg_id', 'reaction_pro', 'metabolite_pro',
       'mid_pro', 'N_pro', 'C_pro', 'reaction_alt', 'metabolite_alt',
       'mid_alt', 'N_alt', 'C_alt'],
      dtype='object')

In [156]:
p2a_df = exchange_df.pivot_table(
    index=['main', 'N/C', 'map_status'], columns=['PRO->ALT'], values='name', #dropna=False,
    aggfunc=lambda x: ', '.join(x)
)

a2p_df = exchange_df.pivot_table(
    index=['main', 'N/C', 'map_status'], columns=['ALT->PRO'], values='name',  #dropna=False,
    aggfunc=lambda x: ', '.join(x)
)

In [157]:
with pd.ExcelWriter('CC1A3_exchanges.xlsx') as writer:  
    p2a_df.to_excel(writer, sheet_name='P->A')
    a2p_df.to_excel(writer, sheet_name='A->P')
    mini_exchange_df.to_excel(writer, sheet_name='All')


In [67]:
alt_df.N_uptake.value_counts()

True    2501
Name: N_uptake, dtype: int64

In [68]:
alt_uptake = (set(alt_df.metabolite_uptake.unique()) | set(alt2_df.metabolite_uptake.unique()) | 
              set(alt2_df.metabolite_uptake2.unique()))
alt1_uptake = set(alt_df.metabolite_uptake.unique()) 
alt2_uptake = set(alt2_df.metabolite_uptake.unique()) | set(alt2_df.metabolite_uptake2.unique())
pro_uptake = set(pro_df.name_ref.unique())
alt_secretion = set(alt_df.metabolite_secretion.unique()) | set(alt2_df.metabolite_secretion.unique())
pro_secretion = set(pro_df.name_ref_secretion.unique())


AttributeError: 'DataFrame' object has no attribute 'name_ref'

In [None]:
pro_secretion_N = set(pro_df.loc[pro_df.N_secretion].name_ref_secretion.unique())
pro_secretion_C = set(pro_df.loc[pro_df.C_secretion].name_ref_secretion.unique())


In [None]:
alt_no_uptake_all = set(exchange_df['metabolite_alt'].dropna().unique()) - alt_uptake
alt_no_uptake_N_all = set(exchange_df.dropna(subset='N_alt').loc[exchange_df.N_alt == True, 'metabolite_alt'].dropna().unique()) - alt_uptake
alt_no_uptake_C_all = set(exchange_df.dropna(subset='C_alt').loc[exchange_df.C_alt == True, 'metabolite_alt'].dropna().unique()) - alt_uptake
alt_no_uptake_N = alt_no_uptake_N_all - alt_no_uptake_C_all
alt_no_uptake_C = alt_no_uptake_C_all - alt_no_uptake_N_all
alt_no_uptake_N_C = alt_no_uptake_C_all & alt_no_uptake_N_all
alt_no_uptake_none = alt_no_uptake_all - (alt_no_uptake_C_all | alt_no_uptake_N_all)

In [None]:
exchange_df.C_alt

In [None]:
alt2_N_metabolites = (
    set(alt2_df.loc[alt2_df.N_uptake, 'metabolite_uptake'].unique()) | 
    set(alt2_df.loc[alt2_df.N_uptake2, 'metabolite_uptake2'].unique()) 
)
alt2_C_metabolites = (
    set(alt2_df.loc[alt2_df.C_uptake, 'metabolite_uptake'].unique()) | 
    set(alt2_df.loc[alt2_df.C_uptake2, 'metabolite_uptake2'].unique()) 
)

alt_N_secretion = set(alt_df.loc[alt_df.N_secretion, 'metabolite_secretion']) | set(alt2_df.loc[alt2_df.N_secretion, 'metabolite_secretion'] )

In [None]:
print(
    '\nalt2_N_metabolites', len(alt2_N_metabolites), 
    '\nalt2_C_metabolites', len(alt2_C_metabolites), 
    '\nalt2_C+N_metabolites', len(alt2_C_metabolites & alt2_N_metabolites), 
    '\nalt2_none_metabolites', len(alt2_uptake - (alt2_C_metabolites | alt2_N_metabolites)), 
    '\nalt_N_secretion_metabolites', len(alt_N_secretion),
    '\nalt_no_uptake_N', len(alt_no_uptake_N) ,
    '\nalt_no_uptake_C', len(alt_no_uptake_C) ,
    '\nalt_no_uptake_N+C', len(alt_no_uptake_N_C),
    '\nalt_no_uptake_none', len(alt_no_uptake_none),
)

In [None]:
', '.join(sorted(alt2_N_metabolites - alt2_C_metabolites))

In [None]:
', '.join(sorted(alt2_C_metabolites - alt2_N_metabolites))

In [None]:
', '.join(sorted(alt_uptake | (alt2_C_metabolites & alt2_N_metabolites)))

In [None]:
', '.join(sorted(alt2_uptake - (alt2_C_metabolites | alt2_N_metabolites)))

In [None]:
', '.join(sorted(alt_N_secretion))

In [None]:
', '.join(sorted(alt_no_uptake_N))

In [None]:
', '.join(sorted(alt_no_uptake_C))


In [None]:
', '.join(sorted(alt_no_uptake_N_C))


In [None]:
', '.join(sorted(alt_no_uptake_none))


In [None]:
print(
    '\nalt->pro', len(pro_uptake & alt_secretion), 
    '\nalt_secretion', len(alt_secretion), 
    '\nalt_secretion', len(alt_N_secretion), 
    '\npro_uptake', len(pro_uptake), 
    '\npro->alt', len(alt_uptake & pro_secretion), 
    '\npro_secretion', len(pro_secretion),
    '\nalt_uptake', len(alt_uptake), 
    '\nalt1_uptake', len(alt1_uptake), 
    '\nalt2_uptake', len(alt2_uptake), 
)

In [None]:
', '.join(sorted(alt_uptake & pro_secretion))

In [None]:
', '.join(sorted(alt1_uptake))

In [None]:
', '.join(sorted(alt2_uptake))

In [None]:
', '.join(sorted(pro_secretion))

In [None]:
', '.join(sorted(pro_secretion_C -pro_secretion_N ))

In [None]:
', '.join(sorted(pro_secretion_N -pro_secretion_C ))

In [None]:
', '.join(sorted(pro_secretion_C))

In [None]:
', '.join(sorted(pro_secretion_C & pro_secretion_N))

In [None]:
', '.join(sorted(pro_secretion - (pro_secretion_C | pro_secretion_N)))

In [None]:
', '.join(sorted(pro_uptake & alt_secretion))

In [None]:
', '.join(sorted(pro_uptake))

In [None]:
', '.join(sorted(alt_secretion))

In [None]:
alt_uptake | pro_uptake | alt_secretion | pro_secretion

In [None]:
pro_df.loc[pro_df.isna().sum(axis=1) > 0]

In [None]:
alt_df.loc[alt_df.isna().sum(axis=1) > 0]

In [None]:
set(alt_df.loc[alt_df.N_secretion, 'metabolite_secretion']) | set(alt2_df.loc[alt2_df.N_secretion, 'metabolite_secretion'] )

In [None]:
pro_df.loc[pro_df.name_ref_secretion == 'GSH']

In [158]:
alt_model.summary()

Metabolite,Reaction,Flux,C-Number,C-Flux
14glucan_e,EX_14glucan_e,95.73,36,12.74%
abg4_e,EX_abg4_e,0.02912,12,0.00%
acald_e,EX_acald_e,1000.0,2,7.40%
adn_e,EX_adn_e,7.754,10,0.29%
ala_L_thr__L_e,EX_ala_L_thr__L_e,11.04,7,0.29%
arg__L_e,EX_arg__L_e,12.88,6,0.29%
bz_e,EX_bz_e,0.004353,7,0.00%
ca2_e,EX_ca2_e,0.2266,0,0.00%
cellb_e,EX_cellb_e,59.18,12,2.63%
cl_e,EX_cl_e,0.2266,0,0.00%

Metabolite,Reaction,Flux,C-Number,C-Flux
co2_e,EX_co2_e,-1000.0,1,3.97%
for_e,EX_for_e,-0.08737,1,0.00%
glc__D_e,EX_glc__D_e,-118.4,6,2.82%
glyc3p_e,EX_glyc3p_e,-11.92,3,0.14%
h2o_e,EX_h2o_e,-1000.0,0,0.00%
pi_e,EX_pi_e,-238.5,0,0.00%
udcpp_e,EX_udcpp_e,-281.4,55,61.44%
ura_e,EX_ura_e,-764.2,4,12.14%
val__L_e,EX_val__L_e,-981.6,5,19.49%


In [165]:
unibac_model.reactions.get_by_id('Growth')

0,1
Reaction identifier,Growth
Name,Biomass reaction
Memory address,0x7ff361841820
Stoichiometry,0.000223 10fthf_c + 0.513689 ala__L_c + 0.000223 amet_c + 0.295792 arg__L_c + 0.241055 asn__L_c + 0.241055 asp__L_c + 54.124831 atp_c + 0.005205 ca2_c + 0.005205 cl_c + 0.000576 coa_c + 0.0001...  0.000223 10-Formyltetrahydrofolate + 0.513689 L-Alanine + 0.000223 S-Adenosyl-L-methionine + 0.295792 L-Arginine + 0.241055 L-Asparagine + 0.241055 L-Aspartate + 54.124831 ATP C10H12N5O13P3 +...
GPR,
Lower bound,0.0
Upper bound,1000.0


In [160]:
r = alt_model.reactions.get_by_id('Growth')

In [164]:
print('\n'.join([m.name for m in r.metabolites]))

10-Formyltetrahydrofolate
L-Alanine
S-Adenosyl-L-methionine
L-Arginine
L-Asparagine
L-Aspartate
ATP C10H12N5O13P3
Calcium
Chloride
Coenzyme A
Co2+
CTP C9H12N3O14P3
Copper
L-Cysteine
DATP C10H12N5O12P3
DCTP C9H12N3O13P3
DGTP C10H12N5O13P3
DTTP C10H13N2O14P3
Flavin adenine dinucleotide oxidized
Fe2+ mitochondria
Iron (Fe3+)
L-Glutamine
L-Glutamate
Glycine
GTP C10H12N5O14P3
H2O H2O
L-Histidine
L-Isoleucine
Potassium
L-Leucine
L-Lysine
L-Methionine
Magnesium
5,10-Methylenetetrahydrofolate
Manganese
Menaquinol 8
Nicotinamide adenine dinucleotide
Nicotinamide adenine dinucleotide phosphate
L-Phenylalanine
L-Proline
Pyridoxal 5'-phosphate
Riboflavin C17H20N4O6
L-Serine
Sulfate
5,6,7,8-Tetrahydrofolate
Thiamine diphosphate
L-Threonine
L-Tryptophan
L-Tyrosine
Undecaprenyl-diphospho-N-acetylmuramoyl-(N-acetylglucosamine)-L-ala-D-glu-meso-2,6-diaminopimeloyl-D-ala-D-ala
UTP C9H11N2O15P3
L-Valine
Zinc
ADP C10H12N5O10P2
H+
Phosphate
Diphosphate


In [174]:
r = alt_model.reactions.get_by_id('Growth')
pd.DataFrame.from_records([{
    'Metabolite' : m.name,
    'Coefficient' : r.metabolites[m]
}    for m in r.metabolites if r.metabolites[m] <= 0]).to_excel('Alteromonas_biomass_function.xlsx')

In [177]:
len(unibac_model.reactions.get_by_id('Growth').metabolites)

57