## Gene Knockouts
- Question 1: How do different genes control model growth?
- Question 2: How does gene count affect metabolite count?

In [79]:
import pandas
from time import time
from cobra.io import load_model
from cobra.flux_analysis import (
    single_gene_deletion, single_reaction_deletion, double_gene_deletion,
    double_reaction_deletion)
from pprint import pprint

cobra_model = load_model("textbook")
ecoli_model = load_model("iJO1366")

In [80]:
solution = cobra_model.optimize()

In [81]:
# inhibit flux
print('complete model: ', cobra_model.optimize())
with cobra_model:
    cobra_model.reactions.PFK.knock_out()
    print('pfk knocked out: ', solution)

complete model:  <Solution 0.874 at 0x15f2e68d0>
pfk knocked out:  <Solution 0.874 at 0x15f2e59d0>


In [82]:
print('complete model: ', cobra_model.optimize())
with cobra_model:
    cobra_model.genes.b1723.knock_out()
    print('pfkA knocked out: ', cobra_model.optimize())
    cobra_model.genes.b3916.knock_out()
    print('pfkB knocked out: ', cobra_model.optimize())

complete model:  <Solution 0.874 at 0x15f2e6f00>
pfkA knocked out:  <Solution 0.874 at 0x15f2e4a40>
pfkB knocked out:  <Solution 0.704 at 0x15f2e6ab0>


In [83]:
# single gene deletions
deletion_results = single_gene_deletion(cobra_model)

In [84]:
# subset of single gene deletions
single_gene_deletion(cobra_model, cobra_model.genes[:20])

Unnamed: 0,ids,growth,status
0,{b3736},0.37423,optimal
1,{b0727},0.858307,optimal
2,{b3735},0.37423,optimal
3,{b0356},0.873922,optimal
4,{b1241},0.873922,optimal
5,{b0118},0.873922,optimal
6,{b2587},0.873922,optimal
7,{b0726},0.858307,optimal
8,{b0116},0.782351,optimal
9,{b0474},0.873922,optimal


In [85]:
# subset of single reaction deletions
single_reaction_deletion(cobra_model, cobra_model.reactions[80:90])

Unnamed: 0,ids,growth,status
0,{SUCCt2_2},0.8739215,optimal
1,{PYRt2},0.8739215,optimal
2,{PPS},0.8739215,optimal
3,{PYK},0.864926,optimal
4,{SUCDi},0.8142975,optimal
5,{SUCCt3},0.8739215,optimal
6,{SUCOAS},0.8583074,optimal
7,{PTAr},0.8739215,optimal
8,{RPE},0.8657156,optimal
9,{RPI},2.579678e-16,optimal


In [86]:
# double gene deletions
double_gene_deletion(
    cobra_model, cobra_model.genes[-5:]).round(4)

Unnamed: 0,ids,growth,status
0,{b2464},0.8739,optimal
1,{b2935},0.8739,optimal
2,"{b2464, b0008}",0.8648,optimal
3,"{b2464, b2935}",0.8739,optimal
4,"{b2465, b0008}",0.8739,optimal
5,"{b2465, b2935}",-0.0,optimal
6,{b2465},0.8739,optimal
7,"{b2465, b3919}",0.704,optimal
8,"{b2935, b0008}",0.8739,optimal
9,{b3919},0.704,optimal


In [87]:
# double reaction deletions
double_reaction_deletion(
    cobra_model, cobra_model.reactions[2:7]).round(4)

Unnamed: 0,ids,growth,status
0,{ACONTb},-0.0,optimal
1,"{ACKr, ADK1}",0.8739,optimal
2,{ACKr},0.8739,optimal
3,"{ACONTa, ACt2r}",0.0,optimal
4,"{ACt2r, ACONTb}",-0.0,optimal
5,"{ADK1, ACONTb}",-0.0,optimal
6,{ACONTa},-0.0,optimal
7,"{ADK1, ACt2r}",0.8739,optimal
8,"{ACKr, ACONTa}",0.0,optimal
9,"{ACKr, ACONTb}",-0.0,optimal


In [88]:
# for double gene deletions
start = time()  # start timer()
double_gene_deletion(
    ecoli_model, ecoli_model.genes[:25], processes=2)
t1 = time() - start
print("Double gene deletions for 200 genes completed in "
      "%.2f sec with 2 cores" % t1)

start = time()  # start timer()
double_gene_deletion(
    ecoli_model, ecoli_model.genes[:25], processes=1)
t2 = time() - start
print("Double gene deletions for 200 genes completed in "
      "%.2f sec with 1 core" % t2)

print("Speedup of %.2fx" % (t2 / t1))

Double gene deletions for 200 genes completed in 3.91 sec with 2 cores
Double gene deletions for 200 genes completed in 3.66 sec with 1 core
Speedup of 0.94x


In [89]:
# for double reaction deletions
start = time()  # start timer()
double_reaction_deletion(
    ecoli_model, ecoli_model.reactions[:25], processes=2)
t1 = time() - start
print("Double reaction deletions for 200 reactions completed in "
      "%.2f sec with 2 cores" % t1)

start = time()  # start timer()
double_reaction_deletion(
    ecoli_model, ecoli_model.reactions[:25], processes=1)
t2 = time() - start
print("Double reaction deletions for 200 reactions completed in "
      "%.2f sec with 1 core" % t2)

print("Speedup of %.2fx" % (t2 / t1))

Double reaction deletions for 200 reactions completed in 3.22 sec with 2 cores
Double reaction deletions for 200 reactions completed in 1.95 sec with 1 core
Speedup of 0.61x


In [90]:
# deletion results

single = single_reaction_deletion(cobra_model)
double = double_reaction_deletion(cobra_model)

print(single.knockout["ATPM"])
print(double.knockout[{"ATPM", "TKT1"}])

       ids    growth   status
76  {ATPM}  0.916647  optimal
               ids   growth   status
1959  {TKT1, ATPM}  0.90584  optimal


In [91]:
atpm = cobra_model.reactions.ATPM
tkt1 = cobra_model.reactions.TKT1
pfk = cobra_model.reactions.PFK

print(single.knockout[atpm, tkt1, pfk])
print(double.knockout[{atpm, tkt1}, {atpm, pfk}, {atpm}])

       ids    growth   status
18  {TKT1}  0.864759  optimal
49   {PFK}  0.704037  optimal
76  {ATPM}  0.916647  optimal
               ids    growth   status
1959  {TKT1, ATPM}  0.905840  optimal
4215        {ATPM}  0.916647  optimal
4374   {ATPM, PFK}  0.704037  optimal


In [92]:
# Experiment #1: essentiality
# "b1377:

In [93]:
ecoli_model.genes

[<Gene b1377 at 0x1597ec230>,
 <Gene b2215 at 0x1597ec320>,
 <Gene b0929 at 0x1597ec440>,
 <Gene b0241 at 0x1597ec5f0>,
 <Gene b4034 at 0x1597ec7a0>,
 <Gene b4033 at 0x1597ec950>,
 <Gene b4032 at 0x1597ecb00>,
 <Gene b4035 at 0x1597eccb0>,
 <Gene b4036 at 0x1597ece60>,
 <Gene b4213 at 0x1597ed010>,
 <Gene b2835 at 0x1597ed1c0>,
 <Gene b2836 at 0x1597ed370>,
 <Gene b3553 at 0x1597ed520>,
 <Gene b1134 at 0x1597ed6d0>,
 <Gene b0446 at 0x1597ed880>,
 <Gene b1009 at 0x1597eda30>,
 <Gene b0954 at 0x1597edbe0>,
 <Gene b0180 at 0x1597edd90>,
 <Gene b0347 at 0x1597edf40>,
 <Gene b3580 at 0x1597ee0f0>,
 <Gene b1093 at 0x1597ee2a0>,
 <Gene b2323 at 0x1597ee450>,
 <Gene b1095 at 0x1597ee600>,
 <Gene b1397 at 0x1597ee7b0>,
 <Gene b1245 at 0x1597ee960>,
 <Gene b1244 at 0x1597eeb10>,
 <Gene b1329 at 0x1597eecc0>,
 <Gene b1246 at 0x1597eee70>,
 <Gene b1247 at 0x1597ef020>,
 <Gene b1386 at 0x1597ef1d0>,
 <Gene b0004 at 0x1597ef380>,
 <Gene b1192 at 0x1597ef530>,
 <Gene b1243 at 0x1597ef6e0>,
 <Gene b42

In [94]:
# gene = ecoli_model.genes.b1377
# reactions = gene.reactions

In [95]:
with ecoli_model:
    ecoli_model.genes.b1377.knock_out() # knocks out b1377, membrane transporter gene
    solution = ecoli_model.optimize() # calculates FBA for KO solution
    print(solution.objective_value) # growth value

# non-essential

0.9823718127269849


In [96]:
# Experiment 2: lethality
# Full genome knockout

In [97]:
all_genes = [g.id for g in ecoli_model.genes]
knockout = single_gene_deletion(ecoli_model, gene_list=all_genes)

In [98]:
knockout = single_gene_deletion(ecoli_model, gene_list=all_genes)

# This is a fix to get the gene's id as the index
knockout['ids'] = [list(i)[0] for i in knockout.ids]
knockout = knockout.set_index('ids')

# The output of the function single_gene_deletion is a dataframe
knockout.head()

Unnamed: 0_level_0,growth,status
ids,Unnamed: 1_level_1,Unnamed: 2_level_1
b1713,0.982372,optimal
b3826,0.982372,optimal
b0839,0.982372,optimal
b1012,0.982372,optimal
b3709,0.982372,optimal


In [99]:
for i in knockout:
    print(knockout[i])

ids
b1713    9.823718e-01
b3826    9.823718e-01
b0839    9.823718e-01
b1012    9.823718e-01
b3709    9.823718e-01
             ...     
b2883    9.823718e-01
b3792    9.823718e-01
b0934    9.823718e-01
b0693    9.823718e-01
b0386    4.621365e-17
Name: growth, Length: 1367, dtype: float64
ids
b1713    optimal
b3826    optimal
b0839    optimal
b1012    optimal
b3709    optimal
          ...   
b2883    optimal
b3792    optimal
b0934    optimal
b0693    optimal
b0386    optimal
Name: status, Length: 1367, dtype: object


In [100]:
threshold = 0.01 # determine if reduction in biomass flux is lethal; lethal if growth is less than threshold

In [101]:
insilico_lethals = list(knockout.index[knockout.growth < threshold]) # look at the point where the growth is less than threshold
insilico_non_lethals = list(knockout.index[knockout.growth > threshold])

In [102]:
print("in-silico lethals:", len(insilico_lethals))
print("in-silico non lethals:", len(insilico_non_lethals)
print("in-silico non lethals:", len(insilico_non_lethals))

SyntaxError: '(' was never closed (561615649.py, line 2)

## Experiment 3: growth rates
- Research question: What genes are associated with glucose uptake and regulation in e. coli?
- Compare multiple gene knockouts to determine and how they change growth rates
## Genes associated w/glucose (from EcoCyc)
- Crp: b3357, reacts with cAMP and forms CRP-cyclic-AMP, DNA-binding transcriptional dual regulator (regulates the expression of over 180 genes)
- ptsA:        b3947, triphosphoryl transport protein, ptsA encodes a protein with similarity to sugar phosphotransferase system (PTS) components
- ptsG:        b1101, glucose-specific phosphotransferase system (PTS)
- ptsH:        b2415, sugar transporter, phosphoenolpyruvate:sugar phosphotransferase system (PTSsugar)
- ptsI:        b2416, sugar phosphotransferase system (PTSsugar)
- ptsN:        b3204, phosphoenolpyruvate (PEP)-dependent carbohydrate phosphotransferase system (PTS)
- ptsP:        b2829, nitrogen phosphotransferase system (PTS Ntr), responds to nitrogen availability (supposedly)
- dhaM (ptsD): b1198, glycerol degradation (limited EcoCyc info)
- fruA (ptsF)  b2167, inner membrane, helps fruA use fructose as a carbon source
- manX (ptsL)  b1817, phosphorylation of 2-deoxyglucose, mannose permease complex
- manY (ptsP)  b1818, forms the transmembrane channel of the E. coli mannose permease complex works with manZ
- manZ (ptsM)  b1819, forms the transmembrane channel of the E. coli mannose permease complex, works with manY
- nagE (ptsN): b0679, part of superfamily of the phosphoenolpyruvate (PEP)-dependent, sugar transporting phosphotransferase system (PTSsugar)
- npr (ptsO):  b3206, second phosphotransfer protein of the nitrogen phosphoenolpyruvate (PEP)-dependent phosphotransferase system (PTSNtr)

## Related observations
- sugar phosphotransferase system (PTSsugar): ptsH, ptsI, nagE, ptsG
- nitrogen phosphotransferase system (PTS Ntr): ptsP, npr?
- phosphoenolpyruvate (PEP)-dependent carbohydrate phosphotransferase system: ptsN
- ManXYZ mannose permease complex: manX, manY, manZ

- PTSsugar genes are close in proximity
- ManXYZ mannose permease complex genes are close in proximity

In [34]:
PTS_sugars = ['b2415', 'b2416', 'b0679', 'b1101']
print(PTS_sugars, end=' ')

['b2415', 'b2416', 'b0679', 'b1101'] 

In [47]:
ecoli_model.genes.index("b2415") # are ptsI and ptsH linked or on the same chromosome?
                                 # index = 65

65

In [46]:
ecoli_model.genes.index("b2416") # are ptsI and ptsH linked or on the same chromosome?
                                 # index = 65

65

In [48]:
ecoli_model.genes.index("b0679")
                                 # index = 66

66

In [49]:
ecoli_model.genes.index("b1101")
                                 # index = 67

67

In [53]:
print('complete model: ', ecoli_model.optimize())

complete model:  <Solution 0.982 at 0x138983380>


In [54]:
print('complete model: ', ecoli_model.optimize())
with ecoli_model:
    ecoli_model.genes.b2415.knock_out()
    print('ptsH knocked out: ', ecoli_model.optimize())
    ecoli_model.genes.b2416.knock_out()
    print('ptsI knocked out: ', ecoli_model.optimize())
    ecoli_model.genes.b0679.knock_out()
    print('nagE knocked out: ', ecoli_model.optimize())
    ecoli_model.genes.b1101.knock_out()
    print('ptsG knocked out: ', ecoli_model.optimize())

complete model:  <Solution 0.982 at 0x138983440>
ptsH knocked out:  <Solution 0.972 at 0x138982ab0>
ptsI knocked out:  <Solution 0.972 at 0x138983590>
nagE knocked out:  <Solution 0.972 at 0x138983da0>
ptsG knocked out:  <Solution 0.972 at 0x138983dd0>


In [103]:
single_gene_deletion(ecoli_model, ecoli_model.genes[65:67]) # nagE and ptsH

Unnamed: 0,ids,growth,status
0,{b0679},0.982372,optimal
1,{b2415},0.972003,optimal


In [136]:
def get_gene_index(g):
    """takes a gene id as a parameter and returns gene index"""
    index = ecoli_model.genes.index(g)
    return index


In [137]:
def get_reaction_id(g):
    """takes a gene id as a parameter and returns the reaction at the same index"""
    gene = ecoli_model.genes.get_by_id(g)
    print(gene)
    index = get_gene_index(gene)
    reaction_id = ecoli_model.reactions[index].id
    return reaction_id

In [138]:
get_gene_index('b0679')

66

In [139]:
get_reaction_id('b0679')

b0679


'EX_ascb__L_e'