### Setup aus MetEngSim

In [1]:
import sys # loading commands to control/navigate within the system architecture
# Loading pandas, a library for data manipulation
from os.path import join
# import xlrd
import pandas as pd
# import lxml

# Loading numpy, a library fo manipulation of numbers
import numpy as np

# loading matplotlib, a library for visualization
import matplotlib.pyplot as plt
%matplotlib inline

# loading cobrapy, a library dedicated to the analysis of genome scale metabolic models
# from cobra.io import read_sbml_model, write_sbml_model, load_matlab_model

# loading escher for metabolic network visualization
# import escher
# from escher import Builder
# from time import sleep
# escher.rc['never_ask_before_quit'] = True
# list of available maps
# print(escher.list_available_maps())

# loading Memote, quality assessment of GSMM
# from memote import test_model, snapshot_report

from biolabsim import Host, Strain, Ecol
from biolabsim import measure_EnzymeLevel1, Help_GenomeGenerator, Help_PromoterStrength
from Bio.Seq import Seq
from copy import deepcopy

print('System ready')

System ready


### Change Setting
#### Add Transkriptionsfaktor

In [2]:
def add_TranscriptionFactor(df, TFname1:str, TFname2:str):
    newdf = df
    Promoter1 = 'GCCCATTGACAAGGCTCTCGCGGCCAGGTATAATTGCACG' # noch zu ändern? (echt, random oder Fkt?)
    # add TF rows:
    TF1 = {
    'RctID':[TFname1], 
    #(Host, Sequence, Scaler=1, Similarity_Thresh=.4, Predict_File=None):
    'Expression': [Help_PromoterStrength('Ecol',Promoter1)],
    'Promoter': [Promoter1],  
    'ORF': ['ATGGAGATGAAGTAA'], # noch zu ändern (echt, random oder Fkt?)
    'Fluxes': [9], # noch zu ändern mit Help_FluxCalculator()?
    'Expr2Flux': [4.5] # noch zu ändern mit Help_Expr2Flux()?
    }
    dfTF1 = pd.DataFrame(TF1)
    newdf = newdf.append(dfTF1, ignore_index=True)
    
    Promoter2 = 'GCCCATTGACAAGGCTCTCGCGGCCAGGTATAATTGCATT'
    TF2 = {
    'RctID':[TFname2], 
    'Expression':[Help_PromoterStrength('Ecol',Promoter2)], # noch zu ändern mit Help_PromoterStrength()?
    'Promoter':[Promoter2],  # noch zu ändern (echt, random oder Fkt?)
    'ORF': ['ATGGAGATGAAGTCC'], # noch zu ändern (echt, random oder Fkt?)
    'Fluxes': [6], # noch zu ändern mit Help_FluxCalculator()?
    'Expr2Flux': [2] # noch zu ändern mit Help_Expr2Flux()?
    }
    dfTF2 = pd.DataFrame(TF2)
    newdf = newdf.append(dfTF2, ignore_index=True)
    
    # add TF_regulated and RegType column:
    zeroarray = np.zeros(len(newdf),dtype=int)
    newdf = newdf.assign(TF_regulated=zeroarray) # später zu 0 oder jeweiliger TF-RctID ändern (random oder festgesetzt?)
    newdf = newdf.assign(RegType=zeroarray)
    
    # set Regulation of some Proteins:
    TFname1_Indx = (newdf['RctID']==TFname1) # access TF1 row
    newdf.loc[TFname1_Indx, 'RegType'] = 'Hill_Activator' # Metabolism Status: TF as activator
    TFname2_Indx = (newdf['RctID']==TFname2) # access TF1 row
    newdf.loc[TFname2_Indx, 'RegType'] = 'Hill_Repressor' # Metabolism Status: TF as repressor
    # Future: fct recognizes what substrates are there, then makes TF_act or TF_rep
    PFK_Indx = (newdf['RctID']=='PFK')
    newdf.loc[PFK_Indx, 'TF_regulated'] = TFname1
    PGI_Indx = (newdf['RctID']=='PGI')
    newdf.loc[PGI_Indx, 'TF_regulated'] = TFname1 
    PGL_Indx = (newdf['RctID']=='PGL')
    newdf.loc[PGL_Indx, 'TF_regulated'] = TFname2
    return newdf

#### Change Wildtype

In [3]:
wtHost = Ecol()
print('Wild type growth rate: {:.2f}'.format(wtHost.strain.objective))

# adding regulator TF to dataframe and genome of wt
WTdf = wtHost.strain.genes_df
WTdf = add_TranscriptionFactor(WTdf, 'TF1', 'TF2')
wtHost.strain.genes_df = WTdf
# Generating new Genome
WTGenome_new = Help_GenomeGenerator(WTdf, 500, .6)
wtHost.strain.genome = Seq(WTGenome_new)
wtHost.strain.genes_df



Wild type growth rate: 0.87




Unnamed: 0,RctID,Expression,Promoter,ORF,Fluxes,Expr2Flux,TF_regulated,RegType
0,PFK,1.832,GCCCATTGATTCCGACGGCGGGCACTGGTACAATTGCACG,ATGACCGGCTTGTGA,7.477382,4.081540,TF1,0
1,PFL,3.470,GCCCATTGACCTATCGCCCCTCGCCCTATACAATTGCACG,ATGGACTTTAAGTGA,0.000000,0.000000,0,0
2,PGI,0.438,GCCCATTGAGTACGTAGTGCGCACGACGTAGGGTTGCACG,ATGCTGGTAGGATAA,4.860861,11.097856,TF1,0
3,PGK,1.134,GCCCATTGAGCATCATGTACCACGCCTGTAGTGTTGCACG,ATGCCCAATGAATGA,-16.023526,-14.130094,0,0
4,PGL,1.226,GCCCATTGAAGGAGGTGAAGGGTTAGAATAGTCTTGCACG,ATGGGCGATTCATAA,4.959985,4.045665,TF2,0
...,...,...,...,...,...,...,...,...
92,NH4t,1.835,GCCCATTGACCGCGGGAGGTAAATGTTCTAAGATTGCACG,ATGTTCCAGCTGTAA,4.765319,2.596904,0,0
93,O2t,0.991,GCCCATTGAGCCGCGCGGTTATTGGAAATACTTTTGCACG,ATGATTAATACGTGA,21.799493,21.997470,0,0
94,PDH,0.735,GCCCATTGATATCCGTCCGTCTCTCCAATAGCCTTGCACG,ATGCCTCTAATGTAA,9.282533,12.629296,0,0
95,TF1,5.733,GCCCATTGACAAGGCTCTCGCGGCCAGGTATAATTGCACG,ATGGAGATGAAGTAA,9.000000,4.500000,0,Hill_Activator


#### Change Mutant


In [4]:
# Generating mutant
# We will actually define the mutations by hand and delete the automatically generated genome for now
myHost = deepcopy(wtHost)
# Mutants to be generated
Mutant_Targets = ['PFK','PGK','PGL','TF1']

for myMutant in Mutant_Targets:
    Mutant_Bool = myHost.strain.genes_df['RctID']==myMutant
    Mutant_Indx = tuple(np.arange(len(Mutant_Bool))[Mutant_Bool])
    MTdf = myHost.strain.genes_df.copy()
    # Finding the index for later printing
    WTIndx = str(wtHost.strain.genome).find(wtHost.strain.genes_df.loc[Mutant_Indx, 'Promoter'])
#     # Generating target promoter sequence, ATTGA is always there
    PromTar = MTdf.loc[Mutant_Indx, 'Promoter'].replace('ATTGA','CCCCC')
    # converting Biopython Seq class to string. This makes string replacements easier
    MutGenome = str(myHost.strain.genome)
    MutGenome = MutGenome.replace(MTdf.loc[Mutant_Indx, 'Promoter'], PromTar)
    myHost.strain.genome = Seq(MutGenome)
#     print('{}, Reference:\t{},\nMutated:\t{}\nwith index {}'.format(myMutant, wtHost.strain.genome[Mutant_Indx:Mutant_Indx+41], myHost.strain.genome[Mutant_Indx:Mutant_Indx+41], Mutant_Indx))
    MTdf.loc[Mutant_Indx, 'Promoter'] = PromTar
    myHost.strain.genes_df = MTdf
    print(myMutant, WTIndx)

PFK 0
PGK 183
PGL 239
TF1 5718


In [5]:
# Testing measure_EnzymeLevel1
qw,we,er = measure_EnzymeLevel1('Ecol', wtHost.strain, myHost.strain)
MT_Filt = qw['RctFlag']

In [6]:
qw # = RctNewDF

Unnamed: 0,RctFlag,RctID,RefExpr,NewExpr,RefFlux,Expr2Flux
0,True,PFK,1.832,1.832,7.477382,4.081540
1,False,PFL,3.470,3.470,0.000000,0.000000
2,False,PGI,0.438,0.438,4.860861,11.097856
3,True,PGK,1.134,1.134,-16.023526,-14.130094
4,True,PGL,1.226,1.226,4.959985,4.045665
...,...,...,...,...,...,...
92,False,NH4t,1.835,1.835,4.765319,2.596904
93,False,O2t,0.991,0.991,21.799493,21.997470
94,False,PDH,0.735,0.735,9.282533,12.629296
95,True,TF1,5.733,5.733,9.000000,4.500000


In [None]:
we # = EnzymeProm_Mutated

In [None]:
er 

In [None]:
MTdf

# measure_RegulatorLevel

In [None]:
def measure_RegulatorLevel(HostName:str, StrainWT:Strain, StrainMut:Strain, MT_Filt):
    'find enzymes regulated by mutated TFs'
    from biolabsim.simulation.metabolism import Help_StrainCharacterizer

    RefGenDF = StrainWT.genes_df
    RefGenome = str(StrainWT.genome)
    MutGenome = str(StrainMut.genome)
    RefModel = StrainWT.model

    # Changes from measure_EnzymeLevel1
    TF_Indx = (RefGenDF['RegType'] != 0) # filter for TFs
    MT_TF = TF_Indx & MT_Filt # filter for TFs with mutations 
    MT_TFnames = RefGenDF.loc[MT_TF, 'RctID'].values # names of mutated TFs
    # MT_TF_indx = np.where(MT_TF == True)[0] # indexes of mutated TFs
    MT_TFenzymes = RefGenDF.TF_regulated.isin(MT_TFnames) # filter for enzymes regulated by mutated TFs
    alteredEnzymes = MT_Filt | MT_TFenzymes # filter for Enzymes that are mutated or regulated by mutated TFs
    TFNewDF = Help_StrainCharacterizer(HostName, RefGenDF, RefGenome, MutGenome, RefModel)
    TFNewDF['RctFlag'] = alteredEnzymes # convert first column from reactions with changed expression to enzymes for which TF is mutated
    RctNewDF = TFNewDF
    
#     #berechnen für alle statt bestimmte
#     RctNew = RctNewDF[RctNewDF['RctFlag']==True].index.values
#     # For reactions with reduced expression and positive flux: reduce the upper limit,
#     # For reactions with increased expression and positive flux: increase the lower limit
#     # for reactions with negative flux the limits are exchanged.
#     FluxPos = RctNew[tuple([RctNewDF.loc[RctNew, 'RefFlux']>0])]
#     FluxNeg = RctNew[tuple([RctNewDF.loc[RctNew, 'RefFlux']<0])]
#     # Finding increased and decreased fluxes
#     FluxInc = RctNew[RctNewDF.loc[RctNew, 'NewExpr'].values / RctNewDF.loc[RctNew, 'RefExpr'].values>1]
#     FluxEqu = RctNew[RctNewDF.loc[RctNew, 'NewExpr'].values / RctNewDF.loc[RctNew, 'RefExpr'].values=1]
#     FluxDec = RctNew[RctNewDF.loc[RctNew, 'NewExpr'].values / RctNewDF.loc[RctNew, 'RefExpr'].values<1]

#     # Comb.1: positive flux with increased expression -> increasing lower bound
#     PosIncInd = np.intersect1d(FluxPos,FluxInc)
#     # Comb.2: positive flux with decreased expression -> decreasing upper bound
#     PosDecInd = np.intersect1d(FluxPos,FluxDec)
#     # Comb.3: negative flux with increased expression -> decreasing lower bound
#     NegIncInd = np.intersect1d(FluxNeg,FluxInc)
#     # Comb.4: positive flux with increased expression -> increasing lower bound
#     NegDecInd = np.intersect1d(FluxNeg,FluxDec)

#     Expr_Change = {'increase': np.hstack([PosIncInd,NegIncInd]),'decrease': np.hstack([PosDecInd,NegDecInd])}
#     Set_Boundary = {'lower': np.hstack([PosIncInd,NegDecInd]),'upper': np.hstack([PosDecInd,NegIncInd])}

    return RctNewDF # , Set_Boundary, Expr_Change

In [None]:
# testing measure_RegulatorLevel
rt= measure_RegulatorLevel('Ecol', wtHost.strain, myHost.strain, MT_Filt)

In [None]:
rt

In [None]:
tz

In [None]:
zu

In [None]:
# try out stuff for measure_RegulatorLevel()
# GenesDF=MTdf
# TF_Indx = (GenesDF['RegType'] != 0) # Filter for TFs
# MT_TF = TF_Indx & MT_Filt # TFs with mutations 
    
# MT_TFnames = GenesDF.loc[MT_TF, 'RctID'].values # names of mutated TFs (array)
# MT_TFenzymes = GenesDF.TF_regulated.isin(MT_TFnames) # Enzymes regulated by mt TFs # RctNewDF =

# print(MT_TFnames)
# MT_TFenzymes


### Regulationsgleichung

In [None]:
# used in Help_FluxCalculator()

def FluxEquation(index, WTdf, TFNewDF): 
    '''gibt abhängig davon, ob das Protein vom TF reguliert wird oder nicht, die Gleichung an, mit der der Flux berechnet wird'''
          
    myTF = WTdf.loc[index, 'TF_regulated']
    
    
    if WTdf[index,'TF_regulated'] == 0: 
        # linear equation
        
        Corr_ExprFlux = TFNewDF.loc[index, 'Expr2Flux'] # aus measureenzymelevel oder measureRegulator?
        Flux = TFNewDF.loc[index, 'NewExpr'] * Corr_ExprFlux 
        return Flux
    
    else:
        # Hill-Equation (from Kim, O'Shea (2008): A quantitative model of transcription factor–activated gene expression)
        
        RegIndx = (WTdf['RctID']==WTdf.loc[index,'TF_regulated']) # filter for the corresponding TF
        
        MaxExpr = 2 * TFNewDF.loc[index, 'NewExpr'].values # bei physiologischem cTF-Wert Flux= mtExpr
        cTF = TFNewDF.loc[RegIndx, 'NewExpr'].values # abhängig von geändertem TF-Promotor
        K = TFNewDF.loc[index, 'RefExpr'].values # physiologischer Wert von c, Expression/Expr2Flux, beeinflusst Aktivität des TF, WT:nicht durch promotor geändert
        n = 1.95 # Kim, Harold D.; O'Shea, Erin K. (2008): A quantitative model of transcription factor–activated gene expression. DOI: 10.1038/nsmb.1500.  
        
        
        if WTdf[RegIndx,'RegType'] == 'Hill_Activator':

            Flux = MaxExpr / (1+(K/cTF)**n)

    
        elif WTdf[RegIndx,'RegType'] == 'Hill_Repressor':

            Flux = MaxExpr * (1 - 1/ (1+(K/cTF)**n))

        return Flux

In [None]:
def setboundary(RctNewDF, StrainWT:Strain):
    'Dict of lower and upper changes'
    lower =  
    upper = [0, 2]
    Set_Boundary = {'lower': lower,
                   'upper': upper}
    

In [None]:
# Finding reactions to change. Three possibilities exits: 1. the promoter of the enzyme itself has changed, 2. the promoter of a regulator has changed, 
# 3. the enzyme promoter and the regulator promoter have changed --> found with rt['RctFLag'] from measure_RegulatorLevel()
# redefining Help_FluxCalculator in simulation/metabolism
def Help_FluxCalculator ( HostName:str, StrainWT:Strain, StrainMut = None ) :
    '''
    Calculation of flux values.

    This method can work in 2 modes:
      [StrainWT only] : The metabolic model of the WT strain is used for calculation.
      [StrainWT + StrainMut] : An additional step of resetting boundaries is done on the model
        before the fluxes are calculated.

    TODO: The "reset boundary" step is mutating the `StrainMut.model`, mutation might not be intended.
      Because of this, perhaps the other `StrainWT.model` gets mutated in the process.
    '''
    from ..measurement.fluxes import measure_EnzymeLevel1

    # adding flux values
    # setup of flux boundaries. For the reference boundary changes are set to 'False',
    # for mutant strains, ractions with altered promoter sequence will change enzyme levels and boundaries must be changed accordingly, their variable is 'True'

    if StrainMut is not None :
        print('resetting boundaries')
        # finding reactions for which the expression has changed, and finding the new level
        RctNewDF, EnzymeProm_Mutated, _ = measure_EnzymeLevel1(HostName, StrainWT, StrainMut)
        # finding regulators whose promoter has changed, and finding the new level
        MT_Filt = RctNewDF['RctFlag']
        AllNewDF = measure_RegulatorLevel(HostName, StrainWT, StrainMut, MT_Filt)  # ???
        # hier wird doch dann RctNewDF überscrhieben, soll das so?
        # oder nennen wir es hier TFNewDF?
        

        Set_Boundary = setboundary(AllNewDF, StrainWT.genes_df)
        
        # Defining the model with the two combinations of either
        # increasing lower bound (increased forward, decreased reverse reaction)
        # decreasing upper bound (decreased forward, increased reverse reaction)
        with StrainWT.model as myModel:
            # Comb.1: positive flux with increased expression -> increasing lower bound

#             for Indx in Set_Boundary['lower']:
#                 myModel.reactions[Indx].lower_bound = RctNewDF.loc[Indx, 'NewExpr'] * RctNewDF.loc[Indx, 'Expr2Flux']
#             # Comb.2: positive flux with decreased expression -> decreasing upper bound
#             for Indx in Set_Boundary['upper']:
#                 myModel.reactions[Indx].upper_bound = RctNewDF.loc[Indx, 'NewExpr'] * RctNewDF.loc[Indx, 'Expr2Flux']

         # "= ..." to be replaces by "= FluxEquation()"

            for Indx in Set_Boundary['lower']:
                myModel.reactions[Indx].lower_bound = FluxEquation(Indx, StrainWT.genes_df, AllNewDF)
            for Indx in Set_Boundary['upper']:
                myModel.reactions[Indx].upper_bound = FluxEquation(Indx, StrainWT.genes_df, AllNewDF)

            Fluxes = myModel.optimize()

    else:
        Fluxes = StrainWT.model.optimize()


    return Fluxes.fluxes.values, Fluxes.objective_value

In [None]:
FluxValues, FluxObjValue = Help_FluxCalculator ( 'Ecol', wtHost.strain, myHost.strain )

### Vergleich von WT und MT aus MetEngSim

In [None]:
wtHost.strain.genes_df

In [None]:
myHost.strain.genes_df

In [None]:
mylist = np.array(wtHost.strain.genes_df['Expression'])
mylist2 = np.array(myHost.strain.genes_df['Expression'])
print('Gene expression differences')
np.round(mylist2/mylist,2)

In [None]:
myl = np.array(wtHost.strain.genes_df['Expr2Flux'])
myl2 = np.array(myHost.strain.genes_df['Expr2Flux'])
# myHost.strain.genes_df
print('Exp2Flux diff')
np.round(myl2/myl,2)

In [None]:
FluxDiff = myHost.strain.genes_df['Fluxes']/wtHost.strain.genes_df['Fluxes']
print('Flux reaction differences')
np.round(np.nan_to_num(FluxDiff, nan=1, posinf=1, neginf=1),2)