### Setup aus MetEngSim

In [1]:
import sys # loading commands to control/navigate within the system architecture
# Loading pandas, a library for data manipulation
from os.path import join
# import xlrd
import pandas as pd
# import lxml

# Loading numpy, a library fo manipulation of numbers
import numpy as np

# loading matplotlib, a library for visualization
import matplotlib.pyplot as plt
%matplotlib inline

# loading cobrapy, a library dedicated to the analysis of genome scale metabolic models
# from cobra.io import read_sbml_model, write_sbml_model, load_matlab_model

# loading escher for metabolic network visualization
# import escher
# from escher import Builder
# from time import sleep
# escher.rc['never_ask_before_quit'] = True
# list of available maps
# print(escher.list_available_maps())

# loading Memote, quality assessment of GSMM
# from memote import test_model, snapshot_report

from biolabsim import Host, Strain, Ecol
from biolabsim import measure_EnzymeLevel1, Help_GenomeGenerator
from Bio.Seq import Seq
from copy import deepcopy

print('System ready')

System ready


### Change Setting
#### Add Transkriptionsfaktor

In [2]:
def add_TranscriptionFactor(df, TFname1:str, TFname2:str):
    newdf = df
    
    # add TF rows:
    TF1 = {
    'RctID':[TFname1], 
    'Expression':[2], # noch zu ändern mit Help_PromoterStrength()?
    'Promoter':['GCCCATTGACCATACCGGAAGGAGATAATAAAGTTGCACG'],  # noch zu ändern (echt, random oder Fkt?)
    'ORF': ['ATGGAGATGAAGTAA'], # noch zu ändern (echt, random oder Fkt?)
    'Fluxes': [9], # noch zu ändern mit Help_FluxCalculator()?
    'Expr2Flux': [4.5] # noch zu ändern mit Help_Expr2Flux()?
    }
    dfTF1 = pd.DataFrame(TF1)
    newdf = newdf.append(dfTF1, ignore_index=True)
    
    TF2 = {
    'RctID':[TFname2], 
    'Expression':[3], # noch zu ändern mit Help_PromoterStrength()?
    'Promoter':['GCCCATTGACCATGGCGGAAGGAGATAATAAAGTTGCACG'],  # noch zu ändern (echt, random oder Fkt?)
    'ORF': ['ATGGAGATGAAGTCC'], # noch zu ändern (echt, random oder Fkt?)
    'Fluxes': [6], # noch zu ändern mit Help_FluxCalculator()?
    'Expr2Flux': [2] # noch zu ändern mit Help_Expr2Flux()?
    }
    dfTF2 = pd.DataFrame(TF2)
    newdf = newdf.append(dfTF2, ignore_index=True)
    
    # add TF_regulated and RegType column:
    zeroarray = np.zeros(len(newdf),dtype=int)
    newdf = newdf.assign(TF_regulated=zeroarray) # später zu 0 oder jeweiliger TF-RctID ändern (random oder festgesetzt?)
    newdf = newdf.assign(RegType=zeroarray)
    
    # set Regulation of some Proteins:
    TFname1_Indx = (newdf['RctID']==TFname1) # access TF1 row
    newdf.loc[TFname1_Indx, 'RegType'] = 'Hill_Activator' # Metabolism Status: TF as activator
    TFname2_Indx = (newdf['RctID']==TFname2) # access TF1 row
    newdf.loc[TFname2_Indx, 'RegType'] = 'Hill_Repressor' # Metabolism Status: TF as repressor
    # Future: fct recognizes what substrates are there, then makes TF_act or TF_rep
    PFK_Indx = (newdf['RctID']=='PFK')
    newdf.loc[PFK_Indx, 'TF_regulated'] = TFname1
    PGI_Indx = (newdf['RctID']=='PGI')
    newdf.loc[PGI_Indx, 'TF_regulated'] = TFname1 
    PGL_Indx = (newdf['RctID']=='PGL')
    newdf.loc[PGL_Indx, 'TF_regulated'] = TFname2
    return newdf

#### Change Wildtype

In [3]:
wtHost = Ecol()
print('Wild type growth rate: {:.2f}'.format(wtHost.strain.objective))

# adding regulator TF to dataframe and genome of wt
WTdf = wtHost.strain.genes_df
WTdf = add_TranscriptionFactor(WTdf, 'TF1', 'TF2')
wtHost.strain.genes_df = WTdf
# Generating new Genome
WTGenome_new = Help_GenomeGenerator(WTdf, 500, .6)
wtHost.strain.genome = Seq(WTGenome_new)
wtHost.strain.genes_df



Wild type growth rate: 0.87


Unnamed: 0,RctID,Expression,Promoter,ORF,Fluxes,Expr2Flux,TF_regulated,RegType
0,PFK,1.880,GCCCATTGAAAGCCGAGATTAGGGTGTGTAGCCTTGCACG,ATGCTGGAACCGTGA,7.477382,3.977331,TF1,0
1,PFL,4.883,GCCCATTGACACACTGCGTATCGCTACTTACCATTGCACG,ATGTGGCGCACTTAA,0.000000,0.000000,0,0
2,PGI,1.541,GCCCATTGATCGCTCCGCGATGAGCGTTTAACTTTGCACG,ATGCCGGTGGAATAA,4.860861,3.154355,TF1,0
3,PGK,1.446,GCCCATTGAAATTGCCTGCGGGTTGGCATAGGCTTGCACG,ATGGACGGGGTTTAA,-16.023526,-11.081277,0,0
4,PGL,1.044,GCCCATTGAAGAGTACATATCGCTTGTGTATGGTTGCACG,ATGGACCATGTGTAA,4.959985,4.750943,TF2,0
...,...,...,...,...,...,...,...,...
92,NH4t,3.151,GCCCATTGACAGAGAGTTCCAAAGAGCATAATGTTGCACG,ATGCTGTATTTTTAA,4.765319,1.512320,0,0
93,O2t,1.337,GCCCATTGAAGACTAGCAAGTTTCAATATATGGTTGCACG,ATGGAACAGAAATAA,21.799493,16.304781,0,0
94,PDH,1.315,GCCCATTGAGCACGAAGAGGACTTCTGATAGCGTTGCACG,ATGAAACTGGGGTAA,9.282533,7.058960,0,0
95,TF1,2.000,GCCCATTGACCATACCGGAAGGAGATAATAAAGTTGCACG,ATGGAGATGAAGTAA,9.000000,4.500000,0,Hill_Activator


#### Change Mutant


In [4]:
# Generating mutant
# We will actually define the mutations by hand and delete the automatically generated genome for now
myHost = deepcopy(wtHost)
# Mutants to be generated
Mutant_Targets = ['PFK','PGK','PGL','TF1','TF2']

for myMutant in Mutant_Targets:
    Mutant_Bool = myHost.strain.genes_df['RctID']==myMutant
    Mutant_Indx = tuple(np.arange(len(Mutant_Bool))[Mutant_Bool])
    MTdf = myHost.strain.genes_df.copy()
    # Finding the index for later printing
    WTIndx = str(wtHost.strain.genome).find(wtHost.strain.genes_df.loc[Mutant_Indx, 'Promoter'])
#     # Generating target promoter sequence, ATTGA is always there
    PromTar = MTdf.loc[Mutant_Indx, 'Promoter'].replace('ATTGA','CCCCC')
    # converting Biopython Seq class to string. This makes string replacements easier
    MutGenome = str(myHost.strain.genome)
    MutGenome = MutGenome.replace(MTdf.loc[Mutant_Indx, 'Promoter'], PromTar)
    myHost.strain.genome = Seq(MutGenome)
#     print('{}, Reference:\t{},\nMutated:\t{}\nwith index {}'.format(myMutant, wtHost.strain.genome[Mutant_Indx:Mutant_Indx+41], myHost.strain.genome[Mutant_Indx:Mutant_Indx+41], Mutant_Indx))
    MTdf.loc[Mutant_Indx, 'Promoter'] = PromTar
    myHost.strain.genes_df = MTdf
    print(myMutant, WTIndx)

PFK 10
PGK 190
PGL 266
TF1 5716
TF2 5777


In [15]:
# Testing measure_EnzymeLevel1
qw,we,er = measure_EnzymeLevel1('Ecol', wtHost.strain, myHost.strain)
MT_Filt = qw['RctFlag']

In [16]:
qw # = RctNewDF

Unnamed: 0,RctFlag,RctID,RefExpr,NewExpr,RefFlux,Expr2Flux
0,True,PFK,1.880,1.682,7.477382,3.977331
1,False,PFL,4.883,4.883,0.000000,0.000000
2,False,PGI,1.541,1.541,4.860861,3.154355
3,True,PGK,1.446,1.214,-16.023526,-11.081277
4,True,PGL,1.044,1.103,4.959985,4.750943
...,...,...,...,...,...,...
92,False,NH4t,3.151,3.151,4.765319,1.512320
93,False,O2t,1.337,1.337,21.799493,16.304781
94,False,PDH,1.315,1.315,9.282533,7.058960
95,True,TF1,2.000,2.466,9.000000,4.500000


In [17]:
we # = EnzymeProm_Mutated

{'lower': array([ 4, 95,  3], dtype=int64),
 'upper': array([ 0, 96], dtype=int64)}

In [18]:
er 

{'increase': array([ 4, 95], dtype=int64),
 'decrease': array([ 0, 96,  3], dtype=int64)}

In [7]:
MTdf

Unnamed: 0,RctID,Expression,Promoter,ORF,Fluxes,Expr2Flux,TF_regulated,RegType
0,PFK,1.880,GCCCCCCCCAAGCCGAGATTAGGGTGTGTAGCCTTGCACG,ATGCTGGAACCGTGA,7.477382,3.977331,TF1,0
1,PFL,4.883,GCCCATTGACACACTGCGTATCGCTACTTACCATTGCACG,ATGTGGCGCACTTAA,0.000000,0.000000,0,0
2,PGI,1.541,GCCCATTGATCGCTCCGCGATGAGCGTTTAACTTTGCACG,ATGCCGGTGGAATAA,4.860861,3.154355,TF1,0
3,PGK,1.446,GCCCCCCCCAATTGCCTGCGGGTTGGCATAGGCTTGCACG,ATGGACGGGGTTTAA,-16.023526,-11.081277,0,0
4,PGL,1.044,GCCCCCCCCAGAGTACATATCGCTTGTGTATGGTTGCACG,ATGGACCATGTGTAA,4.959985,4.750943,TF2,0
...,...,...,...,...,...,...,...,...
92,NH4t,3.151,GCCCATTGACAGAGAGTTCCAAAGAGCATAATGTTGCACG,ATGCTGTATTTTTAA,4.765319,1.512320,0,0
93,O2t,1.337,GCCCATTGAAGACTAGCAAGTTTCAATATATGGTTGCACG,ATGGAACAGAAATAA,21.799493,16.304781,0,0
94,PDH,1.315,GCCCATTGAGCACGAAGAGGACTTCTGATAGCGTTGCACG,ATGAAACTGGGGTAA,9.282533,7.058960,0,0
95,TF1,2.000,GCCCCCCCCCCATACCGGAAGGAGATAATAAAGTTGCACG,ATGGAGATGAAGTAA,9.000000,4.500000,0,Hill_Activator


# measure_RegulatorLevel

In [45]:
def measure_RegulatorLevel(HostName:str, StrainWT:Strain, StrainMut:Strain, MT_Filt):
    'find enzymes regulated by mutated TFs'
    from biolabsim.simulation.metabolism import Help_StrainCharacterizer

    RefGenDF = StrainWT.genes_df
    RefGenome = str(StrainWT.genome)
    MutGenome = str(StrainMut.genome)
    RefModel = StrainWT.model

    # Change from measure_EnzymeLevel1
    TF_Indx = (RefGenDF['RegType'] != 0) # filter for TFs
    MT_TF = TF_Indx & MT_Filt # filter for TFs with mutations 
    
    MT_TFnames = RefGenDF.loc[MT_TF, 'RctID'].values # names of mutated TFs
    # MT_TF_indx = np.where(MT_TF == True)[0] # indexes of mutated TFs
    MT_TFenzymes = RefGenDF.TF_regulated.isin(MT_TFnames) # filter for enzymes regulated by mutated TFs
    TFNewDF = Help_StrainCharacterizer(HostName, RefGenDF, RefGenome, MutGenome, RefModel)
    TFNewDF['RctFlag']=MT_TFenzymes # convert first column from reactions with changed expression to enzymes for which TF is mutated
    RctNewDF = TFNewDF
    
    RctNew = RctNewDF[RctNewDF['RctFlag']==True].index.values
    # For reactions with reduced expression and positive flux: reduce the upper limit,
    # For reactions with increased expression and positive flux: increase the lower limit
    # for reactions with negative flux the limits are exchanged.
    FluxPos = RctNew[tuple([RctNewDF.loc[RctNew, 'RefFlux']>0])]
    FluxNeg = RctNew[tuple([RctNewDF.loc[RctNew, 'RefFlux']<0])]
    # Finding increased and decreased fluxes
    FluxInc = RctNew[RctNewDF.loc[RctNew, 'NewExpr'].values / RctNewDF.loc[RctNew, 'RefExpr'].values>1]
    FluxDec = RctNew[RctNewDF.loc[RctNew, 'NewExpr'].values / RctNewDF.loc[RctNew, 'RefExpr'].values<1]

    # Comb.1: positive flux with increased expression -> increasing lower bound
    PosIncInd = np.intersect1d(FluxPos,FluxInc)
    # Comb.2: positive flux with decreased expression -> decreasing upper bound
    PosDecInd = np.intersect1d(FluxPos,FluxDec)
    # Comb.3: negative flux with increased expression -> decreasing lower bound
    NegIncInd = np.intersect1d(FluxNeg,FluxInc)
    # Comb.4: positive flux with increased expression -> increasing lower bound
    NegDecInd = np.intersect1d(FluxNeg,FluxDec)

    Expr_Change = {'increase': np.hstack([PosIncInd,NegIncInd]),'decrease': np.hstack([PosDecInd,NegDecInd])}
    Set_Boundary = {'lower': np.hstack([PosIncInd,NegDecInd]),'upper': np.hstack([PosDecInd,NegIncInd])}

    return RctNewDF, Set_Boundary, Expr_Change

In [47]:
# testing measure_RegulatorLevel
rt,tz,zu = measure_RegulatorLevel('Ecol', wtHost.strain, myHost.strain, MT_Filt)

In [48]:
rt

Unnamed: 0,RctFlag,RctID,RefExpr,NewExpr,RefFlux,Expr2Flux
0,True,PFK,1.880,1.682,7.477382,3.977331
1,False,PFL,4.883,4.883,0.000000,0.000000
2,True,PGI,1.541,1.541,4.860861,3.154355
3,False,PGK,1.446,1.214,-16.023526,-11.081277
4,True,PGL,1.044,1.103,4.959985,4.750943
...,...,...,...,...,...,...
92,False,NH4t,3.151,3.151,4.765319,1.512320
93,False,O2t,1.337,1.337,21.799493,16.304781
94,False,PDH,1.315,1.315,9.282533,7.058960
95,False,TF1,2.000,2.466,9.000000,4.500000


In [49]:
tz #Indx 2 = PGI nicht erkannt, wurde selbst nicht mutiert

{'lower': array([4], dtype=int64), 'upper': array([0], dtype=int64)}

In [50]:
zu #Indx 2 = PGI nicht erkannt, wurde selbst nicht mutiert

{'increase': array([4], dtype=int64), 'decrease': array([0], dtype=int64)}

In [22]:
# try out stuff for measure_RegulatorLevel()
GenesDF=MTdf
TF_Indx = (GenesDF['RegType'] != 0) # Filter for TFs
MT_TF = TF_Indx & MT_Filt # TFs with mutations 
    
MT_TFnames = GenesDF.loc[MT_TF, 'RctID'].values # names of mutated TFs (array)
MT_TFenzymes = GenesDF.TF_regulated.isin(MT_TFnames) # Enzymes regulated by mt TFs # RctNewDF =

print(MT_TFnames)
MT_TFenzymes


['TF1' 'TF2']


0      True
1     False
2      True
3     False
4      True
      ...  
92    False
93    False
94    False
95    False
96    False
Name: TF_regulated, Length: 97, dtype: bool

In [None]:
# Finding reactions to change. Three possibilities exits: 1. the promoter of the enzyme itself has changed, 2. the promoter of a regulator has changed, 3. the enzyme promoter and the regulator promoter have changed
# redefining Help_FluxCalculator in simulation/metabolism
def Help_FluxCalculator ( HostName:str, StrainWT:Strain, StrainMut:Optional[Strain] = None ) :
    '''
    Calculation of flux values.

    This method can work in 2 modes:
      [StrainWT only] : The metabolic model of the WT strain is used for calculation.
      [StrainWT + StrainMut] : An additional step of resetting boundaries is done on the model
        before the fluxes are calculated.

    TODO: The "reset boundary" step is mutating the `StrainMut.model`, mutation might not be intended.
      Because of this, perhaps the other `StrainWT.model` gets mutated in the process.
    '''
    from ..measurement.fluxes import measure_EnzymeLevel1

    # adding flux values
    # setup of flux boundaries. For the reference boundary changes are set to 'False',
    # for mutant strains, ractions with altered promoter sequence will change enzyme levels and boundaries must be changed accordingly, their variable is 'True'

    if StrainMut is not None :
        print('resetting boundaries')
        # finding reactions for which the expression has changed, and finding the new level
        RctNewDF, EnzymeProm_Mutated, _ = measure_EnzymeLevel1(HostName, StrainWT, StrainMut)
        # finding regulators whose promoter has changed, and finding the new level
        RctNewDF, Regulator_Mutated, _ = measure_RegulatorLevel(HostName, StrainWT, StrainMut)
        
        
        
        # Defining the model with the two combinations of either
        # increasing lower bound (increased forward, decreased reverse reaction)
        # decreasing upper bound (decreased forward, increased reverse reaction)
        with StrainWT.model as myModel:
            # Comb.1: positive flux with increased expression -> increasing lower bound
            for Indx in Set_Boundary['lower']:
                myModel.reactions[Indx].lower_bound = RctNewDF.loc[Indx, 'NewExpr'] * RctNewDF.loc[Indx, 'Expr2Flux']
            # Comb.2: positive flux with decreased expression -> decreasing upper bound
            for Indx in Set_Boundary['upper']:
                myModel.reactions[Indx].upper_bound = RctNewDF.loc[Indx, 'NewExpr'] * RctNewDF.loc[Indx, 'Expr2Flux']

            Fluxes = myModel.optimize()

    else:
        Fluxes = StrainWT.model.optimize()


    return Fluxes.fluxes.values, Fluxes.objective_value

### Regulationsgleichung

In [None]:
# in Help_FluxCalculator:

for Indx in Set_Boundary['lower']:
                myModel.reactions[Indx].lower_bound = FluxEquation(Indx, wtHost.strain.genes_df, myHost.strain.genes_df)
for Indx in Set_Boundary['upper']:
                myModel.reactions[Indx].upper_bound = FluxEquation(Indx, wtHost.strain.genes_df, myHost.strain.genes_df)


# (equationtype = myEquations[RctNewDF.loc[Indx, 'Expr2Flux']])


def FluxEquation(index, WTdf, MTdf): 
    '''gibt abhängig davon, ob das Protein vom TF reguliert wird oder nicht, die Gleichung an, mit der der Flux berechnet wird'''
          
    myTF = WTdf.loc[index, 'TF_regulated']
    
    # linear
    if WTdf[index,'TF_regulated'] == 0: 
        
        Corr_ExprFlux = WTdf.loc[index, 'Expr2Flux'] # WT o. MT
        Flux = MTdf.loc[index, 'NewExpr'] * Corr_ExprFlux 
        return Flux
    
    # Hill
    else:
        
        RegIndx = (WTdf['RctID']==WTdf[index,'TF_regulated'])
        
        MaxExpr = 2 * MTdf.loc[index, 'Expression'].values # bei physiologischem cTF-Wert Flux= mtExpr
        cTF = MTdf.loc[RegIndx, 'Expression'].values # abhängig von geändertem TF-Promotor
        K = WTdf.loc[index, 'Expression'].values # physiologischer Wert von c, Expression/Expr2Flux, beeinflusst Aktivität des TF, WT:nicht durch promotor geändert
        n = 1.95 # Kim, Harold D.; O'Shea, Erin K. (2008): A quantitative model of transcription factor–activated gene expression. DOI: 10.1038/nsmb.1500.  
        
        
        if WTdf[RegIndx,'RegType'] == 'Hill_Activator':

            Flux = MaxExpr / (1+(K/cTF)**n)

            return Flux
    
        elif WTdf[RegIndx,'RegType'] == 'Hill_Repressor':

            Flux = MaxExpr * (1 - 1/ (1+(K/cTF)**n))

            return Flux

### Vergleich von WT und MT aus MetEngSim

In [None]:
wtHost.strain.genes_df

In [None]:
myHost.strain.genes_df

In [None]:
mylist = np.array(wtHost.strain.genes_df['Expression'])
mylist2 = np.array(myHost.strain.genes_df['Expression'])
print('Gene expression differences')
np.round(mylist2/mylist,2)

In [None]:
myl = np.array(wtHost.strain.genes_df['Expr2Flux'])
myl2 = np.array(myHost.strain.genes_df['Expr2Flux'])
# myHost.strain.genes_df
print('Exp2Flux diff')
np.round(myl2/myl,2)

In [None]:
FluxDiff = myHost.strain.genes_df['Fluxes']/wtHost.strain.genes_df['Fluxes']
print('Flux reaction differences')
np.round(np.nan_to_num(FluxDiff, nan=1, posinf=1, neginf=1),2)