In [8]:
import json
import pandas as pd
import matplotlib.pyplot as plt

In [9]:
sphere_sample ={
    'hydrophobic_moment': [0.6, 1.00],
    'net_charge': [0.4, 0.6],
}

fiber_sample = {
    'has_beta_sheet_content': [0.1, 1],
    'net_charge': [0.4, 0.6],
}

samples = [sphere_sample, fiber_sample]

In [10]:
min_max = {
    "has_beta_sheet_content": (0.0, 1.0),
    "hydrophobic_moment": (0.000000, 1.998000),
    "net_charge": (-6.000000, 6.000000),
    "ap":(0.959986, 2.897030)
}

for sample in samples:
    for key in sample:
        if key in ['length','sequence']:
            continue
        sample[key][0] = sample[key][0] * (min_max[key][1] - min_max[key][0]) + min_max[key][0]
        sample[key][1] = sample[key][1] * (min_max[key][1] - min_max[key][0]) + min_max[key][0]

samples

[{'hydrophobic_moment': [1.1987999999999999, 1.998],
  'net_charge': [-1.1999999999999993, 1.1999999999999993]},
 {'has_beta_sheet_content': [0.1, 1.0],
  'net_charge': [-1.1999999999999993, 1.1999999999999993]}]

In [11]:
fiber_samples = pd.read_csv('gen_peptides/fibers_metrics.csv')
fiber_samples['has_beta_sheet_content'] = (fiber_samples['beta_sheet_fraction'] > 0.1).astype(int)
fiber_samples.drop(columns=['peptide_id'], inplace=True)
fiber_samples.drop_duplicates(subset=['sequence'], inplace=True)
fiber_samples

Unnamed: 0,sequence,beta_sheet_fraction,extension_ratio,hydrophobic_moment,planarity_index,curvature_score,radius_of_gyration,alternating_pattern_score,net_charge,aromatic_interaction_score,has_beta_sheet_content
0,CIYFSICLL,0.0,0.401278,0.747778,1.622293,88.754031,4.473587,0.9085,0,1,0
1,VLCFIWN,0.0,0.450900,0.718571,1.378926,79.605695,4.366486,0.5275,0,0,0
2,FCYNPEFC,0.0,0.374525,0.227500,1.239957,78.318447,4.810864,0.9250,-1,1,0
3,LVAWVFQIW,0.0,0.401715,0.797778,1.571811,87.797882,4.494652,0.5710,0,0,0
4,VYPFLMPW,0.0,0.387029,0.660000,1.399808,85.441300,4.173064,0.1300,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
3789,IHLQCQIEW,0.0,0.280102,0.231111,0.756566,61.163858,5.681388,1.6940,-1,0,0
3790,YAYQPDCWL,0.0,0.452786,0.185556,1.525226,75.207558,5.101702,0.4780,-1,1,0
3791,FPIVYSPC,0.0,0.246856,0.532500,1.936038,72.342199,4.063038,0.4100,0,0,0
3792,VFCYCFPLW,0.0,0.532961,0.698889,1.401320,74.393543,5.442312,0.4070,0,0,0


In [12]:
valid_fibers = fiber_samples.copy()

for key in fiber_sample:
    valid_fibers = valid_fibers[(valid_fibers[key] >= fiber_sample[key][0]) & ((valid_fibers[key] <= fiber_sample[key][1]))]

valid_fibers

Unnamed: 0,sequence,beta_sheet_fraction,extension_ratio,hydrophobic_moment,planarity_index,curvature_score,radius_of_gyration,alternating_pattern_score,net_charge,aromatic_interaction_score,has_beta_sheet_content
179,QCVSPHWPAP,0.5,0.14091,0.173,1.390015,71.191524,4.944672,0.366,0,0,1
456,AIVFPGTFHQ,0.5,0.126765,0.476,0.863145,59.594237,5.372047,0.404,0,1,1
572,CQCTAGCYVW,0.5,0.150142,0.322,1.274791,61.67153,5.226244,0.384,0,0,1
621,QYWCPGMAIC,0.5,0.132561,0.404,0.865241,64.124233,5.293483,0.032,0,1,1
624,PAPFPWSIWI,0.5,0.112988,0.637,0.808338,57.768717,5.383118,0.878,0,0,1
732,PFAIPYCSTP,0.5,0.169935,0.387,1.480606,66.983398,5.002869,0.334,0,0,1
971,LICWTGHCHH,0.5,0.124979,0.306,0.917254,57.007593,5.352198,0.412,0,0,1
1072,LDIYNHPFF,0.571429,0.15416,0.346667,1.06924,65.518282,4.779394,0.5565,-1,2,1
1296,RCVQGGHPFC,0.5,0.140804,0.015,1.069499,65.82944,5.256677,0.102,1,0,1
1445,AFLWNGISYQ,0.5,0.129396,0.399,0.865597,62.762412,5.325236,0.218,0,1,1


In [13]:
peptide_propensities_fibers = pd.read_csv('./gen_peptides/filtered_ap_peptides_fiber.txt', names=['sequence', 'sequence_3', 'ap', 'assembly'])
peptide_propensities_fibers['ap'] = min_max['ap'][0] + (peptide_propensities_fibers['ap'] * (min_max['ap'][1] - min_max['ap'][0]))
peptide_propensities_fibers

Unnamed: 0,sequence,sequence_3,ap,assembly
0,ALWYVFPE,Ala-Leu-Trp-Tyr-Val-Phe-Pro-Glu,1.880959,0.997710
1,AVRADYVYC,Ala-Val-Arg-Ala-Asp-Tyr-Val-Tyr-Cys,1.870947,0.998530
2,VPVFLPEAIF,Val-Pro-Val-Phe-Leu-Pro-Glu-Ala-Ile-Phe,1.958868,0.999817
3,PVFDGTALHC,Pro-Val-Phe-Asp-Gly-Thr-Ala-Leu-His-Cys,1.795246,0.977304
4,KGFSYEQYC,Lys-Gly-Phe-Ser-Tyr-Glu-Gln-Tyr-Cys,1.835776,0.991674
...,...,...,...,...
3789,RVCNYVAFW,Arg-Val-Cys-Asn-Tyr-Val-Ala-Phe-Trp,1.941425,0.999832
3790,LGCFQKY,Leu-Gly-Cys-Phe-Gln-Lys-Tyr,1.866403,0.986392
3791,KYFEAGFH,Lys-Tyr-Phe-Glu-Ala-Gly-Phe-His,1.866125,0.997916
3792,NASSLQPWVW,Asn-Ala-Ser-Ser-Leu-Gln-Pro-Trp-Val-Trp,1.869704,0.998846


In [15]:
merged_valid_fibers = valid_fibers.merge(peptide_propensities_fibers, on='sequence', how='inner')
merged_valid_fibers.sort_values(by='ap', ascending=False)[['sequence', 'sequence_3', 'ap']][:15]

Unnamed: 0,sequence,sequence_3,ap
20,VFLVPVDFKY,Val-Phe-Leu-Val-Pro-Val-Asp-Phe-Lys-Tyr,2.0787
4,PAPFPWSIWI,Pro-Ala-Pro-Phe-Pro-Trp-Ser-Ile-Trp-Ile,2.056355
14,ILSCPGWPFY,Ile-Leu-Ser-Cys-Pro-Gly-Trp-Pro-Phe-Tyr,2.025387
5,PFAIPYCSTP,Pro-Phe-Ala-Ile-Pro-Tyr-Cys-Ser-Thr-Pro,2.024292
22,FTFIPGWVDL,Phe-Thr-Phe-Ile-Pro-Gly-Trp-Val-Asp-Leu,1.984914
21,ILSFPDKVTY,Ile-Leu-Ser-Phe-Pro-Asp-Lys-Val-Thr-Tyr,1.953139
26,HYVSGRFLIY,His-Tyr-Val-Ser-Gly-Arg-Phe-Leu-Ile-Tyr,1.94058
13,CVSAPGIFHC,Cys-Val-Ser-Ala-Pro-Gly-Ile-Phe-His-Cys,1.933979
24,PVFCADQWFC,Pro-Val-Phe-Cys-Ala-Asp-Gln-Trp-Phe-Cys,1.926734
10,PVWRADSWHC,Pro-Val-Trp-Arg-Ala-Asp-Ser-Trp-His-Cys,1.926473


In [16]:
sphere_samples = pd.read_csv('gen_peptides/spheres_metrics.csv')
sphere_samples.drop(columns=['peptide_id'], inplace=True)
sphere_samples.drop_duplicates(subset=['sequence'], inplace=True)
sphere_samples

Unnamed: 0,sequence,beta_sheet_fraction,extension_ratio,hydrophobic_moment,planarity_index,curvature_score,radius_of_gyration,alternating_pattern_score,net_charge,aromatic_interaction_score
0,LLYPGYI,0.0,0.221467,0.660000,1.612789,76.109668,3.567114,0.315000,0,1
1,FFIMM,0.0,0.398617,1.008000,1.391486,87.024219,3.097588,0.155000,0,1
2,PWLCE,0.0,0.398615,0.308000,1.391539,87.020234,3.097735,0.403333,-1,0
3,VFAMY,0.0,0.398600,0.758000,1.391667,87.023910,3.097622,0.261667,0,1
4,LYEPIV,0.0,0.454657,0.526667,1.607155,82.084048,3.542254,0.080000,-1,0
...,...,...,...,...,...,...,...,...,...,...
3128,WMDWYW,0.0,0.428394,0.405000,1.591521,85.612568,3.369054,0.696667,-1,4
3129,IFAYFL,0.0,0.439416,0.950000,1.454165,88.743689,3.376977,0.226667,0,2
3130,FWMNL,0.0,0.383015,0.584000,1.429012,87.688685,3.055087,0.948333,0,1
3131,IRESFV,0.0,0.431861,0.033333,1.588575,86.058732,3.392151,1.153333,0,0


In [17]:
valid_spheres = sphere_samples.copy()

for key in sphere_sample:
    valid_spheres = valid_spheres[(valid_spheres[key] >= sphere_sample[key][0]) & ((valid_spheres[key] <= sphere_sample[key][1]))]

valid_spheres

Unnamed: 0,sequence,beta_sheet_fraction,extension_ratio,hydrophobic_moment,planarity_index,curvature_score,radius_of_gyration,alternating_pattern_score,net_charge,aromatic_interaction_score
20,IFFFII,0.0,0.454611,1.285000,1.451293,87.059164,3.445264,0.063333,0,2
111,IFLFIFV,0.0,0.437745,1.210000,1.481132,87.039522,3.791915,0.035000,0,0
152,FFIFFI,0.0,0.452924,1.253333,1.399537,89.456775,3.392132,0.000000,0,5
281,FLFIII,0.0,0.451873,1.263333,1.434598,89.172317,3.418710,0.020000,0,1
302,IWIFIII,0.0,0.606355,1.271429,0.611429,69.515868,5.312401,0.253333,0,0
...,...,...,...,...,...,...,...,...,...,...
3027,LIIIIVW,0.0,0.722890,1.210000,0.643926,65.360247,6.056578,0.122500,0,0
3057,IFFFILV,0.0,0.436297,1.210000,1.526939,86.764081,3.753611,0.110833,0,2
3067,WIIIIV,0.0,0.524352,1.235000,0.829570,77.672989,4.336758,0.090000,0,0
3113,FFIIVI,0.0,0.693778,1.266667,0.707117,64.319177,5.109040,0.100000,0,1


In [29]:
peptide_propensities_spheres = pd.read_csv('./gen_peptides/filtered_ap_peptides_spheres.txt', names=['sequence', 'sequence_3', 'ap', 'assembly'])
peptide_propensities_spheres['ap'] = min_max['ap'][0] + (peptide_propensities_spheres['ap'] * (min_max['ap'][1] - min_max['ap'][0]))
peptide_propensities_spheres

Unnamed: 0,sequence,sequence_3,ap,assembly
0,FVCIT,Phe-Val-Cys-Ile-Thr,2.196335,0.999973
1,CYTEW,Cys-Tyr-Thr-Glu-Trp,1.929523,0.999701
2,QLYQVL,Gln-Leu-Tyr-Gln-Val-Leu,1.879860,0.983154
3,IPLMV,Ile-Pro-Leu-Met-Val,2.053032,0.998857
4,WAIFF,Trp-Ala-Ile-Phe-Phe,2.256845,1.000000
...,...,...,...,...
3981,FCII,Phe-Cys-Ile-Ile,2.148993,0.999893
3982,FWAQMIV,Phe-Trp-Ala-Gln-Met-Ile-Val,2.022415,0.999676
3983,FMFIDW,Phe-Met-Phe-Ile-Asp-Trp,2.156174,0.999998
3984,LWVLFIF,Leu-Trp-Val-Leu-Phe-Ile-Phe,2.185822,0.999999


In [34]:
merged_valid_spheres = valid_spheres.merge(peptide_propensities_spheres, on='sequence', how='inner')
merged_valid_spheres.sort_values(by='ap', ascending=False)[['sequence', 'sequence_3', 'ap']][:15]

Unnamed: 0,sequence,sequence_3,ap
10,FVIFF,Phe-Val-Ile-Phe-Phe,2.300558
34,FIVFF,Phe-Ile-Val-Phe-Phe,2.290336
9,LVIIF,Leu-Val-Ile-Ile-Phe,2.280685
47,FIFVF,Phe-Ile-Phe-Val-Phe,2.271703
58,FIFIF,Phe-Ile-Phe-Ile-Phe,2.264918
40,FFLFI,Phe-Phe-Leu-Phe-Ile,2.263951
28,LIIVF,Leu-Ile-Ile-Val-Phe,2.258977
8,FLIFFI,Phe-Leu-Ile-Phe-Phe-Ile,2.249841
13,FFIFI,Phe-Phe-Ile-Phe-Ile,2.245837
57,VFFIF,Val-Phe-Phe-Ile-Phe,2.245384


In [39]:
merged_valid_spheres.to_csv('valid_spheres.csv', index=False)
merged_valid_fibers.to_csv('valid_fibers.csv', index=False)