# Deep mutational scanning and machine learning uncover antimicrobial peptide features driving membrane selectivity

## Part 2 - Translate the peptide sequences and compute changes in the residues

In [7]:
import pandas as pd
from Bio.Seq import Seq

In [10]:
# Load  count matrix 
counts = pd.read_table("/stor/work/Wilke/luiz/Protegrin-1_Slay_and_ML/data/counts/count_matrix.txt", sep="\s+", engine="python", header=None)

# rename columns
# counts.rename(columns={0:"Sequence", 1:'S01_0uM_IPTG_1', 2:'S02_0uM_IPTG_2', 3:'S03_0uM_IPTG_3',
#        4:'S04_1uM_IPTG_1', 5:'S05_1uM_IPTG_2', 6:'S06_1uM_IPTG_3', 7:'S07_10uM_IPTG_1',
#        8:'S08_10uM_IPTG_2', 9:'S09_10uM_IPTG_3', 10:'S10_100uM_IPTG_1',
#        11:'S11_100uM_IPTG_2', 12:'S12_100uM_IPTG_3'}, inplace=True)

counts

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,AACGGTGGGCGTCTTTGCTACTGTCGTCGCAGGTTCTGCGTTTGCG...,138,178.0,138.0,165.0,181.0,151.0,320.0,300.0,381.0,202.0,96.0,71.0
1,AACGGTGGGCGTCTTTGTTGCTGTCATCGCATGTTCTGCTTTTGTG...,222,261.0,231.0,314.0,331.0,240.0,485.0,573.0,570.0,1259.0,649.0,468.0
2,AACGGTGGGCGTTTTTGCTACTGCCGTCGCAGGTTCTGTGTTTGTG...,31,35.0,23.0,31.0,28.0,34.0,53.0,62.0,72.0,31.0,12.0,8.0
3,AACGGTGGGGGTCTTTGTTATTGCCTTCGCCAGTTCTACGTTTGCG...,67,68.0,63.0,68.0,73.0,72.0,121.0,145.0,143.0,263.0,149.0,114.0
4,AATGGTGGGCGTCTTTGTTACTGTCGTCCCCGGTTCTGTGTTTGTG...,31,40.0,29.0,48.0,45.0,34.0,89.0,78.0,95.0,137.0,86.0,44.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
14864,TGTGGTGGGCGTCTTTGTTACTGCCGTCGTAGGTTCTGCGTATGCG...,337,399.0,380.0,422.0,447.0,437.0,778.0,783.0,868.0,533.0,260.0,202.0
14865,TGTGGTGTGCGTCGTAGTTACTGTCGTAGCAGGTTCTGCGTTTGCG...,46,84.0,67.0,98.0,86.0,77.0,135.0,148.0,155.0,275.0,150.0,111.0
14866,TTCGGTGGCCGTCTTAGCTATTGTCGTCGCAGGTTCTGTCTTTGCG...,1092,1475.0,1299.0,1520.0,1665.0,1474.0,2807.0,3071.0,3268.0,3284.0,1604.0,1179.0
14867,TTCGGTGGGCGTCTTTGCTATTATCATAGCAGGTTCACCGTTTGTG...,16,25.0,47.0,33.0,42.0,40.0,78.0,51.0,69.0,107.0,79.0,54.0


### Translate nucleotide sequence

In [9]:
# Translate nucleotide sequence
counts['peptide'] = ["".join(Seq(sq).translate(to_stop=True)) for sq in counts["Sequence"]]
counts



Unnamed: 0,Sequence,S01_0uM_IPTG_1,S02_0uM_IPTG_2,S03_0uM_IPTG_3,S04_1uM_IPTG_1,S05_1uM_IPTG_2,S06_1uM_IPTG_3,S07_10uM_IPTG_1,S08_10uM_IPTG_2,S09_10uM_IPTG_3,S10_100uM_IPTG_1,S11_100uM_IPTG_2,S12_100uM_IPTG_3,peptide
0,AACGGTGGGCGTCTTTGCTACTGTCGTCGCAGGTTCTGCGTTTGCG...,138,178.0,138.0,165.0,181.0,151.0,320.0,300.0,381.0,202.0,96.0,71.0,NGGRLCYCRRRFCVCGGR
1,AACGGTGGGCGTCTTTGTTGCTGTCATCGCATGTTCTGCTTTTGTG...,222,261.0,231.0,314.0,331.0,240.0,485.0,573.0,570.0,1259.0,649.0,468.0,NGGRLCCCHRMFCFCVGR
2,AACGGTGGGCGTTTTTGCTACTGCCGTCGCAGGTTCTGTGTTTGTG...,31,35.0,23.0,31.0,28.0,34.0,53.0,62.0,72.0,31.0,12.0,8.0,NGGRFCYCRRRFCVCVGR
3,AACGGTGGGGGTCTTTGTTATTGCCTTCGCCAGTTCTACGTTTGCG...,67,68.0,63.0,68.0,73.0,72.0,121.0,145.0,143.0,263.0,149.0,114.0,NGGGLCYCLRQFYVCVGR
4,AATGGTGGGCGTCTTTGTTACTGTCGTCCCCGGTTCTGTGTTTGTG...,31,40.0,29.0,48.0,45.0,34.0,89.0,78.0,95.0,137.0,86.0,44.0,NGGRLCYCRPRFCVCVGP
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14864,TGTGGTGGGCGTCTTTGTTACTGCCGTCGTAGGTTCTGCGTATGCG...,337,399.0,380.0,422.0,447.0,437.0,778.0,783.0,868.0,533.0,260.0,202.0,CGGRLCYCRRRFCVCVGR
14865,TGTGGTGTGCGTCGTAGTTACTGTCGTAGCAGGTTCTGCGTTTGCG...,46,84.0,67.0,98.0,86.0,77.0,135.0,148.0,155.0,275.0,150.0,111.0,CGVRRSYCRSRFCVCVGR
14866,TTCGGTGGCCGTCTTAGCTATTGTCGTCGCAGGTTCTGTCTTTGCG...,1092,1475.0,1299.0,1520.0,1665.0,1474.0,2807.0,3071.0,3268.0,3284.0,1604.0,1179.0,FGGRLSYCRRRFCLCGGR
14867,TTCGGTGGGCGTCTTTGCTATTATCATAGCAGGTTCACCGTTTGTG...,16,25.0,47.0,33.0,42.0,40.0,78.0,51.0,69.0,107.0,79.0,54.0,FGGRLCYYHSRFTVCVGH


### Stacking read counts
This stacked dataframe will be used to compute diferencial expression analysis for each peptide.

In [4]:
# Stacking (sum) all the read counts that translate the same peptide
cols = ['S01_0uM_IPTG_1', 'S02_0uM_IPTG_2', 'S03_0uM_IPTG_3',
       'S04_1uM_IPTG_1', 'S05_1uM_IPTG_2', 'S06_1uM_IPTG_3', 'S07_10uM_IPTG_1',
       'S08_10uM_IPTG_2', 'S09_10uM_IPTG_3', 'S10_100uM_IPTG_1',
       'S11_100uM_IPTG_2', 'S12_100uM_IPTG_3']

df_stacked = counts.groupby(["peptide"], as_index=False)[cols].sum().copy()

df_stacked

Unnamed: 0,peptide,S01_0uM_IPTG_1,S02_0uM_IPTG_2,S03_0uM_IPTG_3,S04_1uM_IPTG_1,S05_1uM_IPTG_2,S06_1uM_IPTG_3,S07_10uM_IPTG_1,S08_10uM_IPTG_2,S09_10uM_IPTG_3,S10_100uM_IPTG_1,S11_100uM_IPTG_2,S12_100uM_IPTG_3
0,,2042,2673.0,2430.0,2873.0,3093.0,2614.0,4801.0,5656.0,5990.0,12997.0,6094.0,5096.0
1,AGGRLCYCRRRFCVCVGR,173,196.0,193.0,242.0,213.0,236.0,409.0,412.0,459.0,240.0,145.0,71.0
2,AVGVFAIAVADSVFAQDVKSTCRH,300,418.0,309.0,379.0,384.0,354.0,665.0,711.0,775.0,2007.0,1072.0,711.0
3,AVGVFVIAVAGSAFVYDVKSTCRH,208,271.0,211.0,260.0,329.0,281.0,488.0,445.0,520.0,1406.0,710.0,523.0
4,AVPVFVIAVAGSVFA,158,212.0,192.0,261.0,274.0,236.0,368.0,445.0,461.0,1041.0,526.0,454.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7795,YGGLLCYCRRRSCVCVGR,448,577.0,481.0,592.0,626.0,580.0,1082.0,1174.0,1165.0,970.0,589.0,370.0
7796,YGGRLCYCGRRLCVCVGR,56,71.0,81.0,67.0,83.0,76.0,129.0,139.0,151.0,361.0,211.0,130.0
7797,YGGRLCYCHHRICVCVGR,319,435.0,393.0,438.0,508.0,424.0,885.0,846.0,992.0,884.0,450.0,330.0
7798,YGGRLCYCRRRFCVSV,149,208.0,172.0,191.0,183.0,189.0,316.0,383.0,350.0,525.0,270.0,223.0


In [5]:
# adding pep length column
df_stacked.insert(2, 'len_pep', df_stacked.peptide.str.len())
df_stacked = df_stacked.query('len_pep == 18').copy()
df_stacked

Unnamed: 0,peptide,S01_0uM_IPTG_1,len_pep,S02_0uM_IPTG_2,S03_0uM_IPTG_3,S04_1uM_IPTG_1,S05_1uM_IPTG_2,S06_1uM_IPTG_3,S07_10uM_IPTG_1,S08_10uM_IPTG_2,S09_10uM_IPTG_3,S10_100uM_IPTG_1,S11_100uM_IPTG_2,S12_100uM_IPTG_3
1,AGGRLCYCRRRFCVCVGR,173,18,196.0,193.0,242.0,213.0,236.0,409.0,412.0,459.0,240.0,145.0,71.0
6,CCGRLCYCRRRSCVCVGR,20,18,32.0,32.0,36.0,35.0,25.0,51.0,71.0,76.0,69.0,36.0,22.0
7,CDGRLCYCRRRFCVCVGR,33,18,69.0,35.0,47.0,44.0,51.0,95.0,101.0,110.0,113.0,61.0,51.0
8,CDGRLCYCRSGLSVSVGH,101,18,126.0,136.0,141.0,139.0,145.0,330.0,288.0,295.0,405.0,223.0,204.0
9,CGARFCYSRRRFCVCVGR,163,18,207.0,194.0,191.0,242.0,195.0,373.0,388.0,400.0,559.0,263.0,218.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7794,YGGHLCSCRRRLCVTVGR,331,18,466.0,408.0,438.0,500.0,430.0,789.0,892.0,937.0,1276.0,596.0,501.0
7795,YGGLLCYCRRRSCVCVGR,448,18,577.0,481.0,592.0,626.0,580.0,1082.0,1174.0,1165.0,970.0,589.0,370.0
7796,YGGRLCYCGRRLCVCVGR,56,18,71.0,81.0,67.0,83.0,76.0,129.0,139.0,151.0,361.0,211.0,130.0
7797,YGGRLCYCHHRICVCVGR,319,18,435.0,393.0,438.0,508.0,424.0,885.0,846.0,992.0,884.0,450.0,330.0


In [6]:
# creating columns with mean df_stacked for each group
df_stacked["Mean_0IPTG_reads"] = ((df_stacked['S01_0uM_IPTG_1'] + df_stacked['S02_0uM_IPTG_2'] + df_stacked['S03_0uM_IPTG_3']) /3).round()
df_stacked["Mean_1IPTG_reads"] = ((df_stacked['S04_1uM_IPTG_1'] + df_stacked['S05_1uM_IPTG_2'] + df_stacked['S06_1uM_IPTG_3']) /3).round()
df_stacked["Mean_10IPTG_reads"] = ((df_stacked['S07_10uM_IPTG_1'] + df_stacked['S08_10uM_IPTG_2'] + df_stacked['S09_10uM_IPTG_3']) /3).round()
df_stacked["Mean_100IPTG_reads"] = ((df_stacked['S10_100uM_IPTG_1'] + df_stacked['S11_100uM_IPTG_2'] + df_stacked['S12_100uM_IPTG_3']) /3).round()

df_stacked

Unnamed: 0,peptide,S01_0uM_IPTG_1,len_pep,S02_0uM_IPTG_2,S03_0uM_IPTG_3,S04_1uM_IPTG_1,S05_1uM_IPTG_2,S06_1uM_IPTG_3,S07_10uM_IPTG_1,S08_10uM_IPTG_2,S09_10uM_IPTG_3,S10_100uM_IPTG_1,S11_100uM_IPTG_2,S12_100uM_IPTG_3,Mean_0IPTG_reads,Mean_1IPTG_reads,Mean_10IPTG_reads,Mean_100IPTG_reads
1,AGGRLCYCRRRFCVCVGR,173,18,196.0,193.0,242.0,213.0,236.0,409.0,412.0,459.0,240.0,145.0,71.0,187.0,230.0,427.0,152.0
6,CCGRLCYCRRRSCVCVGR,20,18,32.0,32.0,36.0,35.0,25.0,51.0,71.0,76.0,69.0,36.0,22.0,28.0,32.0,66.0,42.0
7,CDGRLCYCRRRFCVCVGR,33,18,69.0,35.0,47.0,44.0,51.0,95.0,101.0,110.0,113.0,61.0,51.0,46.0,47.0,102.0,75.0
8,CDGRLCYCRSGLSVSVGH,101,18,126.0,136.0,141.0,139.0,145.0,330.0,288.0,295.0,405.0,223.0,204.0,121.0,142.0,304.0,277.0
9,CGARFCYSRRRFCVCVGR,163,18,207.0,194.0,191.0,242.0,195.0,373.0,388.0,400.0,559.0,263.0,218.0,188.0,209.0,387.0,347.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7794,YGGHLCSCRRRLCVTVGR,331,18,466.0,408.0,438.0,500.0,430.0,789.0,892.0,937.0,1276.0,596.0,501.0,402.0,456.0,873.0,791.0
7795,YGGLLCYCRRRSCVCVGR,448,18,577.0,481.0,592.0,626.0,580.0,1082.0,1174.0,1165.0,970.0,589.0,370.0,502.0,599.0,1140.0,643.0
7796,YGGRLCYCGRRLCVCVGR,56,18,71.0,81.0,67.0,83.0,76.0,129.0,139.0,151.0,361.0,211.0,130.0,69.0,75.0,140.0,234.0
7797,YGGRLCYCHHRICVCVGR,319,18,435.0,393.0,438.0,508.0,424.0,885.0,846.0,992.0,884.0,450.0,330.0,382.0,457.0,908.0,555.0


### Creating functions to compare peptides

In [7]:
# def to find the differences between the residues
idx = "RRRRICYCPLRFYVCVGR"

def diff_func(idxx):
    pg1 = "RGGRLCYCRRRFCVCVGR"
    idx = idxx
    diff = []
    for i in range(0, len(pg1)):
        aa = idx[i]
        pg_aa = pg1[i]
        
        if pg_aa == aa:
            diff.append("-")
        else:
            diff.append(aa)
    diff_seq = "".join(diff)
    return diff_seq


diff_func(idx)

'-RR-I---PL--Y-----'

In [8]:
# function to add the number of changes in each peptide
def changesFunc(idxx):
    pg1 = "RGGRLCYCRRRFCVCVGR"
    idx = idxx
    changes = 0
    for i in range(0, len(pg1)):
        aa = idx[i]
        pg_aa = pg1[i]
        
        if pg_aa != aa:
            changes+=1
    return changes

changesFunc(idx)

6

### Applying changes into the new data frame

In [9]:
# applying functions
df_stacked["diff_in_seq"] = [diff_func(x) for x in df_stacked["peptide"]]
df_stacked["changes"] = [changesFunc(x) for x in df_stacked["peptide"]]
df_stacked

Unnamed: 0,peptide,Sequence,len_pep,S01_0uM_IPTG_1,S02_0uM_IPTG_2,S03_0uM_IPTG_3,S04_1uM_IPTG_1,S05_1uM_IPTG_2,S06_1uM_IPTG_3,S07_10uM_IPTG_1,...,S09_10uM_IPTG_3,S10_100uM_IPTG_1,S11_100uM_IPTG_2,S12_100uM_IPTG_3,Mean_0IPTG_reads,Mean_1IPTG_reads,Mean_10IPTG_reads,Mean_100IPTG_reads,diff_in_seq,changes
1,AGGRLCYCRRRFCVCVGR,GCCGGTGGGCGTCTTTGTTACTGTCGTCGCAGATTCTGTGTTTGTG...,18,173,196.0,193.0,242.0,213.0,236.0,409.0,...,459.0,240.0,145.0,71.0,187.0,230.0,427.0,152.0,A-----------------,1
6,CCGRLCYCRRRSCVCVGR,TGCTGTGGGCGTCTTTGCTACTGCCGTCGCAGGTCCTGTGTTTGTG...,18,20,32.0,32.0,36.0,35.0,25.0,51.0,...,76.0,69.0,36.0,22.0,28.0,32.0,66.0,42.0,CC---------S------,3
7,CDGRLCYCRRRFCVCVGR,TGCGATGGGCGTCTTTGTTACTGCCGTCGCAGGTTCTGCGTTTGTG...,18,33,69.0,35.0,47.0,44.0,51.0,95.0,...,110.0,113.0,61.0,51.0,46.0,47.0,102.0,75.0,CD----------------,2
8,CDGRLCYCRSGLSVSVGH,TGCGATGGGCGTCTTTGCTATTGCCGTAGCGGGCTCAGTGTTAGTG...,18,101,126.0,136.0,141.0,139.0,145.0,330.0,...,295.0,405.0,223.0,204.0,121.0,142.0,304.0,277.0,CD-------SGLS-S--H,8
9,CGARFCYSRRRFCVCVGR,TGCGGTGCGCGTTTTTGCTACAGCCGTCGGAGGTTCTGCGTTTGCG...,18,163,207.0,194.0,191.0,242.0,195.0,373.0,...,400.0,559.0,263.0,218.0,188.0,209.0,387.0,347.0,C-A-F--S----------,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7794,YGGHLCSCRRRLCVTVGR,TACGGTGGACATCTTTGCTCCTGCCGTCGCAGGCTATGCGTTACCG...,18,331,466.0,408.0,438.0,500.0,430.0,789.0,...,937.0,1276.0,596.0,501.0,402.0,456.0,873.0,791.0,Y--H--S----L--T---,5
7795,YGGLLCYCRRRSCVCVGR,TACGGTGGGCTTCTTTGTTATTGCCGTCGCAGGTCCTGTGTTTGTG...,18,448,577.0,481.0,592.0,626.0,580.0,1082.0,...,1165.0,970.0,589.0,370.0,502.0,599.0,1140.0,643.0,Y--L-------S------,3
7796,YGGRLCYCGRRLCVCVGR,TACGGTGGGCGTCTCTGTTATTGTGGTCGCAGGTTGTGCGTTTGTG...,18,56,71.0,81.0,67.0,83.0,76.0,129.0,...,151.0,361.0,211.0,130.0,69.0,75.0,140.0,234.0,Y-------G--L------,3
7797,YGGRLCYCHHRICVCVGR,TACGGTGGGCGTCTTTGTTATTGTCATCACAGGATCTGCGTTTGCG...,18,319,435.0,393.0,438.0,508.0,424.0,885.0,...,992.0,884.0,450.0,330.0,382.0,457.0,908.0,555.0,Y-------HH-I------,4


In [10]:
# Merging dataframes
new_diff = df_stacked["diff_in_seq"].str.split("", n= -1, expand = True)
new_df_stacked = pd.merge(df_stacked, new_diff, left_index=True, right_index=True)
new_df_stacked.reset_index(drop=False, inplace=True)

# rename columns positions
new_cols = {'index':'ID', 0:'P0', 1:'P1', 2:'P2', 3:'P3', 4:'P4', 5:'P5', 6:'P6', 7:'P7', 8:'P8', 9:'P9', 10:'P10', 11:'P11', 12:'P12', 13:'P13', 14:'P14', 15:'P15', 16:'P16', 17:'P17', 18:'P18', 19:'P19'}
new_df_stacked.rename(columns=new_cols, inplace=True)
new_df_stacked

Unnamed: 0,ID,peptide,Sequence,len_pep,S01_0uM_IPTG_1,S02_0uM_IPTG_2,S03_0uM_IPTG_3,S04_1uM_IPTG_1,S05_1uM_IPTG_2,S06_1uM_IPTG_3,...,P10,P11,P12,P13,P14,P15,P16,P17,P18,P19
0,1,AGGRLCYCRRRFCVCVGR,GCCGGTGGGCGTCTTTGTTACTGTCGTCGCAGATTCTGTGTTTGTG...,18,173,196.0,193.0,242.0,213.0,236.0,...,-,-,-,-,-,-,-,-,-,
1,6,CCGRLCYCRRRSCVCVGR,TGCTGTGGGCGTCTTTGCTACTGCCGTCGCAGGTCCTGTGTTTGTG...,18,20,32.0,32.0,36.0,35.0,25.0,...,-,-,S,-,-,-,-,-,-,
2,7,CDGRLCYCRRRFCVCVGR,TGCGATGGGCGTCTTTGTTACTGCCGTCGCAGGTTCTGCGTTTGTG...,18,33,69.0,35.0,47.0,44.0,51.0,...,-,-,-,-,-,-,-,-,-,
3,8,CDGRLCYCRSGLSVSVGH,TGCGATGGGCGTCTTTGCTATTGCCGTAGCGGGCTCAGTGTTAGTG...,18,101,126.0,136.0,141.0,139.0,145.0,...,S,G,L,S,-,S,-,-,H,
4,9,CGARFCYSRRRFCVCVGR,TGCGGTGCGCGTTTTTGCTACAGCCGTCGGAGGTTCTGCGTTTGCG...,18,163,207.0,194.0,191.0,242.0,195.0,...,-,-,-,-,-,-,-,-,-,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7099,7794,YGGHLCSCRRRLCVTVGR,TACGGTGGACATCTTTGCTCCTGCCGTCGCAGGCTATGCGTTACCG...,18,331,466.0,408.0,438.0,500.0,430.0,...,-,-,L,-,-,T,-,-,-,
7100,7795,YGGLLCYCRRRSCVCVGR,TACGGTGGGCTTCTTTGTTATTGCCGTCGCAGGTCCTGTGTTTGTG...,18,448,577.0,481.0,592.0,626.0,580.0,...,-,-,S,-,-,-,-,-,-,
7101,7796,YGGRLCYCGRRLCVCVGR,TACGGTGGGCGTCTCTGTTATTGTGGTCGCAGGTTGTGCGTTTGTG...,18,56,71.0,81.0,67.0,83.0,76.0,...,-,-,L,-,-,-,-,-,-,
7102,7797,YGGRLCYCHHRICVCVGR,TACGGTGGGCGTCTTTGTTATTGTCATCACAGGATCTGCGTTTGCG...,18,319,435.0,393.0,438.0,508.0,424.0,...,H,-,I,-,-,-,-,-,-,


In [13]:
# Filtring just the columns of interest
cols = ['ID', 'peptide', 'Sequence', 'len_pep', 'diff_in_seq', 'changes',"P1","P2","P3","P4","P5","P6","P7","P8","P9","P10","P11","P12","P13","P14","P15","P16","P17","P18",
        'Mean_0IPTG_reads','Mean_1IPTG_reads', 'Mean_10IPTG_reads', 'Mean_100IPTG_reads', 
        'S01_0uM_IPTG_1', 'S02_0uM_IPTG_2', 'S03_0uM_IPTG_3',
        'S04_1uM_IPTG_1', 'S05_1uM_IPTG_2', 'S06_1uM_IPTG_3',
        'S07_10uM_IPTG_1', 'S08_10uM_IPTG_2','S09_10uM_IPTG_3',
        'S10_100uM_IPTG_1', 'S11_100uM_IPTG_2','S12_100uM_IPTG_3'
        ]

new_df_stacked = new_df_stacked[cols]
new_df_stacked

Unnamed: 0,ID,peptide,Sequence,len_pep,diff_in_seq,changes,P1,P2,P3,P4,...,S03_0uM_IPTG_3,S04_1uM_IPTG_1,S05_1uM_IPTG_2,S06_1uM_IPTG_3,S07_10uM_IPTG_1,S08_10uM_IPTG_2,S09_10uM_IPTG_3,S10_100uM_IPTG_1,S11_100uM_IPTG_2,S12_100uM_IPTG_3
0,1,AGGRLCYCRRRFCVCVGR,GCCGGTGGGCGTCTTTGTTACTGTCGTCGCAGATTCTGTGTTTGTG...,18,A-----------------,1,A,-,-,-,...,193.0,242.0,213.0,236.0,409.0,412.0,459.0,240.0,145.0,71.0
1,6,CCGRLCYCRRRSCVCVGR,TGCTGTGGGCGTCTTTGCTACTGCCGTCGCAGGTCCTGTGTTTGTG...,18,CC---------S------,3,C,C,-,-,...,32.0,36.0,35.0,25.0,51.0,71.0,76.0,69.0,36.0,22.0
2,7,CDGRLCYCRRRFCVCVGR,TGCGATGGGCGTCTTTGTTACTGCCGTCGCAGGTTCTGCGTTTGTG...,18,CD----------------,2,C,D,-,-,...,35.0,47.0,44.0,51.0,95.0,101.0,110.0,113.0,61.0,51.0
3,8,CDGRLCYCRSGLSVSVGH,TGCGATGGGCGTCTTTGCTATTGCCGTAGCGGGCTCAGTGTTAGTG...,18,CD-------SGLS-S--H,8,C,D,-,-,...,136.0,141.0,139.0,145.0,330.0,288.0,295.0,405.0,223.0,204.0
4,9,CGARFCYSRRRFCVCVGR,TGCGGTGCGCGTTTTTGCTACAGCCGTCGGAGGTTCTGCGTTTGCG...,18,C-A-F--S----------,4,C,-,A,-,...,194.0,191.0,242.0,195.0,373.0,388.0,400.0,559.0,263.0,218.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7099,7794,YGGHLCSCRRRLCVTVGR,TACGGTGGACATCTTTGCTCCTGCCGTCGCAGGCTATGCGTTACCG...,18,Y--H--S----L--T---,5,Y,-,-,H,...,408.0,438.0,500.0,430.0,789.0,892.0,937.0,1276.0,596.0,501.0
7100,7795,YGGLLCYCRRRSCVCVGR,TACGGTGGGCTTCTTTGTTATTGCCGTCGCAGGTCCTGTGTTTGTG...,18,Y--L-------S------,3,Y,-,-,L,...,481.0,592.0,626.0,580.0,1082.0,1174.0,1165.0,970.0,589.0,370.0
7101,7796,YGGRLCYCGRRLCVCVGR,TACGGTGGGCGTCTCTGTTATTGTGGTCGCAGGTTGTGCGTTTGTG...,18,Y-------G--L------,3,Y,-,-,-,...,81.0,67.0,83.0,76.0,129.0,139.0,151.0,361.0,211.0,130.0
7102,7797,YGGRLCYCHHRICVCVGR,TACGGTGGGCGTCTTTGTTATTGTCATCACAGGATCTGCGTTTGCG...,18,Y-------HH-I------,4,Y,-,-,-,...,393.0,438.0,508.0,424.0,885.0,846.0,992.0,884.0,450.0,330.0


### Saving results

In [14]:
#new_df_stacked.to_csv("results/counts_matrix_stacked.csv", header=True, index=False)

### The End