### Setup aus MetEngSim

In [1]:
import sys # loading commands to control/navigate within the system architecture
# Loading pandas, a library for data manipulation
from os.path import join
# import xlrd
import pandas as pd
# import lxml

# Loading numpy, a library fo manipulation of numbers
import numpy as np

# loading matplotlib, a library for visualization
import matplotlib.pyplot as plt
%matplotlib inline

# loading cobrapy, a library dedicated to the analysis of genome scale metabolic models
# from cobra.io import read_sbml_model, write_sbml_model, load_matlab_model

# loading escher for metabolic network visualization
# import escher
# from escher import Builder
# from time import sleep
# escher.rc['never_ask_before_quit'] = True
# list of available maps
# print(escher.list_available_maps())

# loading Memote, quality assessment of GSMM
# from memote import test_model, snapshot_report

from biolabsim import Host, Strain, Ecol
from biolabsim import measure_EnzymeLevel1, Help_GenomeGenerator
from Bio.Seq import Seq
from copy import deepcopy

print('System ready')

System ready


### Change Setting
#### Add Transkriptionsfaktor

In [2]:
def add_TranscriptionFactor(df):
    newdf = df
    
    # add TF row:
    TF = {
    'RctID':['TF'], 
    'Expression':[2], # noch zu ändern mit Help_PromoterStrength()?
    'Promoter':['GCCCATTGACCATACCGGAAGGAGATAATAAAGTTGCACG'],  # noch zu ändern (echt, random oder Fkt?)
    'ORF': ['ATGGAGATGAAGTAA'], # noch zu ändern (echt, random oder Fkt?)
    'Fluxes': [9], # noch zu ändern mit Help_FluxCalculator()?
    'Expr2Flux': [4.5] # noch zu ändern mit Help_Expr2Flux()?
    }
    dfTF = pd.DataFrame(TF)
    newdf = newdf.append(dfTF, ignore_index=True)
    
    # add TF_regulated and RegType column:
    zeroarray = np.zeros(len(newdf),dtype=int)
    newdf = newdf.assign(TF_regulated=zeroarray) # später zu 0 oder jeweiliger TF-RctID ändern (random oder festgesetzt?)
    newdf = newdf.assign(RegType=zeroarray)
    
    # set Regulation of some Proteins:
    TF_Indx = (newdf['RctID']=='TF') # access TF row
    newdf.loc[TF_Indx, 'RegType'] = 'Hill_Activator' # Metabolism Status: TF as activator
    # Future: fct recognizes what substrates are there, then makes TF_act or TF_rep
    PFK_Indx = (newdf['RctID']=='PFK')
    newdf.loc[PFK_Indx, 'TF_regulated'] = 'TF'
    PGI_Indx = (newdf['RctID']=='PGI')
    newdf.loc[PGI_Indx, 'TF_regulated'] = 'TF' #Ulf: for now all TF_regulated Proteins regulated by TF
    return newdf

#### Change Wildtype

In [3]:
wtHost = Ecol()
print('Wild type growth rate: {:.2f}'.format(wtHost.strain.objective))

# adding regulator TF to dataframe and genome of wt
WTdf = wtHost.strain.genes_df
WTdf = add_TranscriptionFactor(WTdf)
wtHost.strain.genes_df = WTdf
# Generating new Genome
WTGenome_new = Help_GenomeGenerator(WTdf, 500, .6)
wtHost.strain.genome = Seq(WTGenome_new)
wtHost.strain.genes_df



Wild type growth rate: 0.87


Unnamed: 0,RctID,Expression,Promoter,ORF,Fluxes,Expr2Flux,TF_regulated,RegType
0,PFK,1.670,GCCCATTGAACTCGCATGAAATGGTGCTTACGGTTGCACG,ATGAAACAACGCTAA,7.477382,4.477474,TF,0
1,PFL,2.971,GCCCATTGACTGTAACTCTCACTGCCGTTAACTTTGCACG,ATGAACCGAGGCTAG,0.000000,0.000000,0,0
2,PGI,1.149,GCCCATTGAATGACGTTTACCAGGAGGCTACGATTGCACG,ATGGAAATCATTTAA,4.860861,4.230514,TF,0
3,PGK,1.791,GCCCATTGAGAGCGCATCGTAACCAGGATATGTTTGCACG,ATGCATGAAGATTAA,-16.023526,-8.946692,0,0
4,PGL,2.477,GCCCATTGACCGAAAGAAGACCAAAGGGTACCTTTGCACG,ATGGGAGGACAGTGA,4.959985,2.002416,0,0
...,...,...,...,...,...,...,...,...
91,NADTRHD,1.420,GCCCATTGATTAATGGGACAAACTTTGTTACGATTGCACG,ATGTTTTATATGTAA,0.000000,0.000000,0,0
92,NH4t,1.363,GCCCATTGATACGTCTATGGCCAGCGGTTAACCTTGCACG,ATGTAAGGTAAATAA,4.765319,3.496199,0,0
93,O2t,3.166,GCCCATTGACGTTGTAAGCAGTGCCAGGTAATCTTGCACG,ATGGTGAACTTATAA,21.799493,6.885500,0,0
94,PDH,1.624,GCCCATTGATCACCTTACGCAGGGCGCTTAGAATTGCACG,ATGGTGATTAAATGA,9.282533,5.715845,0,0


#### Change Mutant


In [4]:
# Generating mutant
# We will actually define the mutations by hand and delete the automatically generated genome for now
myHost = deepcopy(wtHost)
# Mutants to be generated
Mutant_Targets = ['PFK','PGK','TF']

for myMutant in Mutant_Targets:
    Mutant_Bool = myHost.strain.genes_df['RctID']==myMutant
    Mutant_Indx = tuple(np.arange(len(Mutant_Bool))[Mutant_Bool])
    MTdf = myHost.strain.genes_df.copy()
    # Finding the index for later printing
    WTIndx = str(wtHost.strain.genome).find(wtHost.strain.genes_df.loc[Mutant_Indx, 'Promoter'])
#     # Generating target promoter sequence, ATTGA is always there
    PromTar = MTdf.loc[Mutant_Indx, 'Promoter'].replace('ATTGA','CCCCC')
    # converting Biopython Seq class to string. This makes string replacements easier
    MutGenome = str(myHost.strain.genome)
    MutGenome = MutGenome.replace(MTdf.loc[Mutant_Indx, 'Promoter'], PromTar)
    myHost.strain.genome = Seq(MutGenome)
#     print('{}, Reference:\t{},\nMutated:\t{}\nwith index {}'.format(myMutant, wtHost.strain.genome[Mutant_Indx:Mutant_Indx+41], myHost.strain.genome[Mutant_Indx:Mutant_Indx+41], Mutant_Indx))
    MTdf.loc[Mutant_Indx, 'Promoter'] = PromTar
    myHost.strain.genes_df = MTdf
    print(myMutant, WTIndx)

PFK 16
PGK 187
TF 5723


In [5]:
# Testing measure_EnzymeLevel1
qw,we,er = measure_EnzymeLevel1('Ecol', wtHost.strain, myHost.strain)

In [6]:
qw

Unnamed: 0,RctFlag,RctID,RefExpr,NewExpr,RefFlux,Expr2Flux
0,False,PFK,1.061,1.061,7.477382,7.047485
1,False,PFL,1.466,1.466,0.000000,0.000000
2,False,PGI,2.015,2.015,4.860861,2.412338
3,False,PGK,0.929,0.929,-16.023526,-17.248144
4,False,PGL,2.607,2.607,4.959985,1.902564
...,...,...,...,...,...,...
91,False,NADTRHD,0.572,0.572,0.000000,0.000000
92,False,NH4t,1.787,1.787,4.765319,2.666659
93,False,O2t,2.055,2.055,21.799493,10.608026
94,False,PDH,1.758,1.758,9.282533,5.280166


In [8]:
MTdf

Unnamed: 0,RctID,Expression,Promoter,ORF,Fluxes,Expr2Flux,TF_regulated,RegType
0,PFK,1.061,GCCCCCCCCTGTTTCATCGGTAAAAGAGTAGGGTTGCACG,ATGGGTGACTCGTAA,7.477382,7.047485,TF,0
1,PFL,1.466,GCCCATTGAATTACATATAAGAGCCATTTAGCCTTGCACG,ATGCAGTTCGTTTGA,0.000000,0.000000,0,0
2,PGI,2.015,GCCCATTGACGTTTGTGTTACGAACAAATACGGTTGCACG,ATGTTGCATATCTGA,4.860861,2.412338,TF,0
3,PGK,0.929,GCCCCCCCCCTAGCCATGCTACACCGAATAGGATTGCACG,ATGCTCACTTGGTGA,-16.023526,-17.248144,0,0
4,PGL,2.607,GCCCATTGAATGTAGCAACCGTTGCTGCTAACCTTGCACG,ATGAAACCTACGTGA,4.959985,1.902564,0,0
...,...,...,...,...,...,...,...,...
91,NADTRHD,0.572,GCCCATTGAGTGCGATGGGATTTGCACATAGGGTTGCACG,ATGGAGCTCAGGTAA,0.000000,0.000000,0,0
92,NH4t,1.787,GCCCATTGACTGGAAGGACTATCCAGATTAAGGTTGCACG,ATGGAGGTGATTTAA,4.765319,2.666659,0,0
93,O2t,2.055,GCCCATTGAAATTTGCTACGGACGAAAGTATCTTTGCACG,ATGCTGATCATGTAA,21.799493,10.608026,0,0
94,PDH,1.758,GCCCATTGAAGATACTGATTCCACACTTTAGACTTGCACG,ATGGATATGGCTTAA,9.282533,5.280166,0,0


In [None]:
# Finding reactions to change. Three possibilities exits: 1. the promoter of the enzyme itself has changed, 2. the promoter of a regulator has changed, 3. the enzyme promoter and the regulator promoter have changed
# redefining Help_FluxCalculator in simulation/metabolism
def Help_FluxCalculator ( HostName:str, StrainWT:Strain, StrainMut:Optional[Strain] = None ) :
    '''
    Calculation of flux values.

    This method can work in 2 modes:
      [StrainWT only] : The metabolic model of the WT strain is used for calculation.
      [StrainWT + StrainMut] : An additional step of resetting boundaries is done on the model
        before the fluxes are calculated.

    TODO: The "reset boundary" step is mutating the `StrainMut.model`, mutation might not be intended.
      Because of this, perhaps the other `StrainWT.model` gets mutated in the process.
    '''
    from ..measurement.fluxes import measure_EnzymeLevel1

    # adding flux values
    # setup of flux boundaries. For the reference boundary changes are set to 'False',
    # for mutant strains, ractions with altered promoter sequence will change enzyme levels and boundaries must be changed accordingly, their variable is 'True'

    if StrainMut is not None :
        print('resetting boundaries')
        # finding reactions for which the expression has changed, and finding the new level
        RctNewDF, EnzymeProm_Mutated, _ = measure_EnzymeLevel1(HostName, StrainWT, StrainMut)
        # finding regulators whose promoter has changed, and finding the new level
        RctNewDF, Regulator_Mutated, _ = measure_RegulatorLevel(HostNane, StrainWt, StrainMut)
        
        
        
        # Defining the model with the two combinations of either
        # increasing lower bound (increased forward, decreased reverse reaction)
        # decreasing upper bound (decreased forward, increased reverse reaction)
        with StrainWT.model as myModel:
            # Comb.1: positive flux with increased expression -> increasing lower bound
            for Indx in Set_Boundary['lower']:
                myModel.reactions[Indx].lower_bound = RctNewDF.loc[Indx, 'NewExpr'] * RctNewDF.loc[Indx, 'Expr2Flux']
            # Comb.2: positive flux with decreased expression -> decreasing upper bound
            for Indx in Set_Boundary['upper']:
                myModel.reactions[Indx].upper_bound = RctNewDF.loc[Indx, 'NewExpr'] * RctNewDF.loc[Indx, 'Expr2Flux']

            Fluxes = myModel.optimize()

    else:
        Fluxes = StrainWT.model.optimize()


    return Fluxes.fluxes.values, Fluxes.objective_value

### Regulationsgleichung

In [None]:
# in Help_FluxCalculator:

for Indx in Set_Boundary['lower']:
                myModel.reactions[Indx].lower_bound = FluxEquation(Indx, wtHost.strain.genes_df, myHost.strain.genes_df)
for Indx in Set_Boundary['upper']:
                myModel.reactions[Indx].upper_bound = FluxEquation(Indx, wtHost.strain.genes_df, myHost.strain.genes_df)


# (equationtype = myEquations[RctNewDF.loc[Indx, 'Expr2Flux']])


def FluxEquation(index, WTdf, MTdf): 
    '''gibt abhängig davon, ob das Protein vom TF reguliert wird oder nicht, die Gleichung an, mit der der Flux berechnet wird'''
          
    myTF = WTdf.loc[index, 'TF_regulated']
    
    # linear
    if WTdf[index,'TF_regulated'] == 0: 
        
        Corr_ExprFlux = WTdf.loc[index, 'Expr2Flux'] # WT o. MT
        Flux = MTdf.loc[index, 'NewExpr'] * Corr_ExprFlux 
        return Flux
    
    # Hill
    else:
        
        RegIndx = (WTdf['RctID']==WTdf[index,'TF_regulated'])
        
        MaxExpr = 2 * MTdf.loc[index, 'Expression'].values # bei physiologischem cTF-Wert Flux= mtExpr
        cTF = MTdf.loc[RegIndx, 'Expression'].values # abhängig von geändertem TF-Promotor
        K = WTdf.loc[index, 'Expression'].values # physiologischer Wert von c, Expression/Expr2Flux, beeinflusst Aktivität des TF, WT:nicht durch promotor geändert
        n = 1.95 # Kim, Harold D.; O'Shea, Erin K. (2008): A quantitative model of transcription factor–activated gene expression. DOI: 10.1038/nsmb.1500.  
        
        
        if WTdf[RegIndx,'RegType'] == 'Hill_Activator':

            Flux = MaxExpr / (1+(K/cTF)**n)

            return Flux
    
        elif WTdf[RegIndx,'RegType'] == 'Hill_Repressor':

            Flux = MaxExpr * (1 - 1/ (1+(K/cTF)**n))

            return Flux

### Vergleich von WT und MT aus MetEngSim

In [None]:
wtHost.strain.genes_df

In [None]:
myHost.strain.genes_df

In [None]:
mylist = np.array(wtHost.strain.genes_df['Expression'])
mylist2 = np.array(myHost.strain.genes_df['Expression'])
print('Gene expression differences')
np.round(mylist2/mylist,2)

In [None]:
myl = np.array(wtHost.strain.genes_df['Expr2Flux'])
myl2 = np.array(myHost.strain.genes_df['Expr2Flux'])
# myHost.strain.genes_df
print('Exp2Flux diff')
np.round(myl2/myl,2)

In [None]:
FluxDiff = myHost.strain.genes_df['Fluxes']/wtHost.strain.genes_df['Fluxes']
print('Flux reaction differences')
np.round(np.nan_to_num(FluxDiff, nan=1, posinf=1, neginf=1),2)