### Setup aus MetEngSim

In [None]:
import sys # loading commands to control/navigate within the system architecture
# Loading pandas, a library for data manipulation
from os.path import join
# import xlrd
import pandas as pd
# import lxml

# Loading numpy, a library fo manipulation of numbers
import numpy as np

# loading matplotlib, a library for visualization
import matplotlib.pyplot as plt
%matplotlib inline

# loading cobrapy, a library dedicated to the analysis of genome scale metabolic models
# from cobra.io import read_sbml_model, write_sbml_model, load_matlab_model

# loading escher for metabolic network visualization
# import escher
# from escher import Builder
# from time import sleep
# escher.rc['never_ask_before_quit'] = True
# list of available maps
# print(escher.list_available_maps())

# loading Memote, quality assessment of GSMM
# from memote import test_model, snapshot_report

from biolabsim import Host, Strain, Ecol
from biolabsim import measure_EnzymeLevel1, Help_GenomeGenerator
from Bio.Seq import Seq

print('System ready')

### Ändern des Dataframes
#### Hinzufügen des Transkriptionsfaktors

In [None]:
def add_TranscriptionFactor(df):
    newdf = df
    
    # add TF row:
    TF = {
    'RctID':['TF'], 
    'Expression':[2], # noch zu ändern mit Help_PromoterStrength()?
    'Promoter':['GCCCATTGACCATACCGGAAGGAGATAATAAAGTTGCACG'],  # noch zu ändern (echt, random oder Fkt?)
    'ORF': ['ATGGAGATGAAGTAA'], # noch zu ändern (echt, random oder Fkt?)
    'Fluxes': [9], # noch zu ändern mit Help_FluxCalculator()?
    'Expr2Flux': [4.5] # noch zu ändern mit Help_Expr2Flux()?
    }
    dfTF = pd.DataFrame(TF)
    newdf = newdf.append(dfTF, ignore_index=True)
    
    # add TF_regulated and RegType column:
    zeroarray = np.zeros(len(newdf),dtype=int)
    newdf = newdf.assign(TF_regulated=zeroarray) # später zu 0 oder jeweiliger TF-RctID ändern (random oder festgesetzt?)
    newdf = newdf.assign(RegType=zeroarray)
    
    # set Regulation of some Proteins:
    TF_Indx = (newdf['RctID']=='TF') # access TF row
#     newdf.loc[TF_Indx, 'TF_regulated'] = 'TF' # TF regulated by itself #Ulf: using a more general strategy without self-regulation 
    newdf.loc[TF_Indx, 'RegType'] = 'Hill_Activator' # Metabolism Status: TF as activator
    # Future: fct recognizes what substrates are there, then makes TF_act or TF_rep
    PFK_Indx = (newdf['RctID']=='PFK')
    newdf.loc[PFK_Indx, 'TF_regulated'] = 'TF'
#     newdf.loc[PFK_Indx, 'RegType'] = 'Hill_Activator' #Ulf: the regulation type is irrelevant for the enzymes
    PGI_Indx = (newdf['RctID']=='PGI')
    newdf.loc[PGI_Indx, 'TF_regulated'] = 'TF' #Ulf: for now all TF_regulated Proteins regulated by TF
#     newdf.loc[PGI_Indx, 'RegType'] = 'Hill_Activator'
    return newdf

In [None]:
wtHost = Ecol()
print('Wild type growth rate: {:.2f}'.format(wtHost.strain.objective))

# adding regulator TF to dataframe and genome of wt
WTdf = wtHost.strain.genes_df
WTdf = add_TranscriptionFactor(WTdf)
wtHost.strain.genes_df = WTdf
# Generating new Genome
WTGenome_new = Help_GenomeGenerator(WTdf, 500, .6)
wtHost.strain.genome = Seq(WTGenome_new)
wtHost.strain.genes_df


In [None]:
# Generating mutant
# We will actually define the mutations by hand and delete the automatically generated genome for now
myHost = wtHost
# Mutants to be generated
Mutant_Targets = ['PFK','PGK','TF']

for myMutant in Mutant_Targets:
    Mutant_Bool = myHost.strain.genes_df['RctID']==myMutant
    Mutant_Indx = tuple(np.arange(len(Mutant_Bool))[Mutant_Bool])
    MTdf = myHost.strain.genes_df.copy()
    # Finding the index for later printing
    WTIndx = str(wtHost.strain.genome).find(wtHost.strain.genes_df.loc[Mutant_Indx, 'Promoter'])
#     # Generating target promoter sequence, ATTGA is always there
    PromTar = MTdf.loc[Mutant_Indx, 'Promoter'].replace('ATTGA','CCCCC')
    # converting Biopython Seq class to string. This makes string replacements easier
    MutGenome = str(myHost.strain.genome)
    MutGenome = MutGenome.replace(MTdf.loc[Mutant_Indx, 'Promoter'], PromTar)
    myHost.strain.genome = Seq(MutGenome)
#     print('{}, Reference:\t{},\nMutated:\t{}\nwith index {}'.format(myMutant, wtHost.strain.genome[Mutant_Indx:Mutant_Indx+41], myHost.strain.genome[Mutant_Indx:Mutant_Indx+41], Mutant_Indx))
    print(myMutant, WTIndx)

In [None]:
# introducing promoter mutation for PFK
# 1. changing the dataframe information
MTdf = WTdf.copy()
# Generating target promoter sequence, ATTGA is always there
PFK_PromTar = MTdf.loc[0, 'Promoter'].replace('ATTGA','CCCCC')
# converting Biopython Seq class to string. This makes string replacements easier
MutGenome = str(myHost.strain.genome)
# Finding the index for later printing
MutIndx = str(wtHost.strain.genome).find(WTdf.loc[0, 'Promoter'])
MutGenome = MutGenome.replace(MTdf.loc[0, 'Promoter'], PFK_PromTar)
myHost.strain.genome = Seq(MutGenome)
print('Reference PFK:\t{},\nMutated PFK:\t{}'.format(wtHost.strain.genome[MutIndx:MutIndx+41], myHost.strain.genome[MutIndx:MutIndx+41]))
print('Index of PFK in genome',MutIndx)

# introducing promoter mutation in regulator TF
# Generating target promoter sequence
TF_PromTar = MTdf.loc[95, 'Promoter'].replace('ATTGA','TTTTT')
# converting Biopython Seq class to string. This makes string replacements easier
MutGenome = str(myHost.strain.genome)
# Finding the index for later printing
MutIndx = str(wtHost.strain.genome).find(WTdf.loc[95, 'Promoter'])
MutGenome = MutGenome.replace(MTdf.loc[95, 'Promoter'], TF_PromTar)
myHost.strain.genome = Seq(MutGenome)
print('Reference TF:\t{},\nMutated TF:\t{}'.format(wtHost.strain.genome[MutIndx:MutIndx+41], myHost.strain.genome[MutIndx:MutIndx+41]))
print('Index of TF in genome',MutIndx)


print('Mutation growth rate: {:.2f}'.format(myHost.strain.objective))
myHost.strain.genes_df
# adding regulator TF to dataframe and genome of 
MTdf = myHost.strain.genes_df
MTdf = add_TranscriptionFactor(MTdf)
# Generating new Genome
WTGenome_new = Help_GenomeGenerator(MTdf, 500, .6)
wtHost.strain.genome = Seq(WTGenome_new)

In [None]:
str(wtHost.strain.genome).find(WTdf.loc[95, 'Promoter'])
# print(WTdf.loc[95, 'Promoter'])
# wtHost.strain.genome

In [None]:
# Testing measure_EnzymeLevel1
qw,we,er = measure_EnzymeLevel1('Ecol', wtHost.strain, myHost.strain)

In [None]:
qw

In [None]:
# Finding reactions to change. Three possibilities exits: 1. the promoter of the enzyme itself has changed, 2. the promoter of a regulator has changed, 3. the enzyme promoter and the regulator promoter have changed
# redefining Help_FluxCalculator in simulation/metabolism
def Help_FluxCalculator ( HostName:str, StrainWT:Strain, StrainMut:Optional[Strain] = None ) :
    '''
    Calculation of flux values.

    This method can work in 2 modes:
      [StrainWT only] : The metabolic model of the WT strain is used for calculation.
      [StrainWT + StrainMut] : An additional step of resetting boundaries is done on the model
        before the fluxes are calculated.

    TODO: The "reset boundary" step is mutating the `StrainMut.model`, mutation might not be intended.
      Because of this, perhaps the other `StrainWT.model` gets mutated in the process.
    '''
    from ..measurement.fluxes import measure_EnzymeLevel1

    # adding flux values
    # setup of flux boundaries. For the reference boundary changes are set to 'False',
    # for mutant strains, ractions with altered promoter sequence will change enzyme levels and boundaries must be changed accordingly, their variable is 'True'

    if StrainMut is not None :
        print('resetting boundaries')
        # finding reactions for which the expression has changed, and finding the new level
        RctNewDF, EnzymeProm_Mutated, _ = measure_EnzymeLevel1(HostName, StrainWT, StrainMut)
        # finding regulators whose promoter has changed, and finding the new level
        RctNewDF, Regulator_Mutated, _ = measure_RegulatorLevel(HostNane, StrainWt, StrainMut)
        
        
        
        # Defining the model with the two combinations of either
        # increasing lower bound (increased forward, decreased reverse reaction)
        # decreasing upper bound (decreased forward, increased reverse reaction)
        with StrainWT.model as myModel:
            # Comb.1: positive flux with increased expression -> increasing lower bound
            for Indx in Set_Boundary['lower']:
                myModel.reactions[Indx].lower_bound = RctNewDF.loc[Indx, 'NewExpr'] * RctNewDF.loc[Indx, 'Expr2Flux']
            # Comb.2: positive flux with decreased expression -> decreasing upper bound
            for Indx in Set_Boundary['upper']:
                myModel.reactions[Indx].upper_bound = RctNewDF.loc[Indx, 'NewExpr'] * RctNewDF.loc[Indx, 'Expr2Flux']

            Fluxes = myModel.optimize()

    else:
        Fluxes = StrainWT.model.optimize()


    return Fluxes.fluxes.values, Fluxes.objective_value

In [None]:
# unnötig but nett to know: random alle Gene durch TF regulieren lassen oder nicht

# import random

# rand_onoff = random.randint(0,1)
    
# newdf['TF_regulated'] = newdf['TF_regulated'].apply(lambda x: x * 0 + onoff)

# newdf

### Regulationsgleichung

In [None]:
# in Help_FluxCalculator:

for Indx in Set_Boundary['lower']:
                myModel.reactions[Indx].lower_bound = FluxEquation(Indx, wtHost.strain.genes_df, myHost.strain.genes_df)
for Indx in Set_Boundary['upper']:
                myModel.reactions[Indx].upper_bound = FluxEquation(Indx, wtHost.strain.genes_df, myHost.strain.genes_df)


# (equationtype = myEquations[RctNewDF.loc[Indx, 'Expr2Flux']])


def FluxEquation(index, WTdf, MTdf): 
    '''gibt abhängig davon, ob das Protein vom TF reguliert wird oder nicht, die Gleichung an, mit der der Flux berechnet wird'''
          
    myTF = WTdf.loc[index, 'TF_regulated']
    
    # linear
    if WTdf[index,'TF_regulated'] == 0: 
        
        Corr_ExprFlux = WTdf.loc[index, 'Expr2Flux'] # WT o. MT
        Flux = MTdf.loc[index, 'NewExpr'] * Corr_ExprFlux 
        return Flux
    
    # Hill
    else:
        
        RegIndx = (WTdf['RctID']==WTdf[index,'TF_regulated'])
        
        MaxExpr = 2 * MTdf.loc[index, 'Expression'].values # bei physiologischem cTF-Wert Flux= mtExpr
        cTF = MTdf.loc[RegIndx, 'Expression'].values # abhängig von geändertem TF-Promotor
        K = WTdf.loc[index, 'Expression'].values # physiologischer Wert von c, Expression/Expr2Flux, beeinflusst Aktivität des TF, WT:nicht durch promotor geändert
        n = 1.95 # Kim, Harold D.; O'Shea, Erin K. (2008): A quantitative model of transcription factor–activated gene expression. DOI: 10.1038/nsmb.1500.  
        
        
        if WTdf[RegIndx,'RegType'] == 'Hill_Activator':

            Flux = MaxExpr / (1+(K/cTF)**n)

            return Flux
    
        elif WTdf[RegIndx,'RegType'] == 'Hill_Repressor':

            Flux = MaxExpr * (1 - 1/ (1+(K/cTF)**n))

            return Flux

### Vergleich von WT und MT aus MetEngSim

In [None]:
wtHost.strain.genes_df

In [None]:
myHost.strain.genes_df

In [None]:
mylist = np.array(wtHost.strain.genes_df['Expression'])
mylist2 = np.array(myHost.strain.genes_df['Expression'])
print('Gene expression differences')
np.round(mylist2/mylist,2)

In [None]:
myl = np.array(wtHost.strain.genes_df['Expr2Flux'])
myl2 = np.array(myHost.strain.genes_df['Expr2Flux'])
# myHost.strain.genes_df
print('Exp2Flux diff')
np.round(myl2/myl,2)

In [None]:
FluxDiff = myHost.strain.genes_df['Fluxes']/wtHost.strain.genes_df['Fluxes']
print('Flux reaction differences')
np.round(np.nan_to_num(FluxDiff, nan=1, posinf=1, neginf=1),2)