In [1]:
import importlib
bsider = importlib.import_module("B-SIDER")
# import B-SIDER as "bsider"

In [2]:
target = "YLLYY"
# This sequence is an amyloid aggregate,
# forming an antiparallel beta sheet
# with its reverse sequence (YYLLY)
# See https://www.rcsb.org/structure/4E0L

parallel = False
# antiparallel beta sheet
min_frag = 3
# Minimum length for search. The default value is 3

database = "./database/comp_seq_DB.db"
# The database file

In [3]:
a = bsider.B_SIDER(database)
# Initiate a B-SIDER class

Fragment database: ./database/comp_seq_DB.db



In [4]:
seq_output = "ex_complementary_sequences.txt"
# Sequences found from the database will be saved in "ex_complementary_sequences.txt"

a.comp_seq_search(target, parallel, min_frag, seq_output)

Searching for the database...

Target sequence: YLLYY
parallelity: Anti-parallel
Minimum fragment size: 3

Searching fragments: YLLYY (1 matches found)
Searching fragments: YLLY- (23 matches found)
Searching fragments: -LLYY (22 matches found)
Searching fragments: YLL-- (431 matches found)
Searching fragments: -LLY- (456 matches found)
Searching fragments: --LYY (358 matches found)

Complementary sequences:

FLNCY
FLNC-
VSTM-
GLLL-
NVIN-
EVYL-
FGLF-
CYSA-
HAKQ-
FRLQ-
WALF-
YQLV-
YLSY-
TISI-
VAPF-
VRVG-
FLLQ-
IHTM-
DLRS-
PYHK-
VLGE-
AEIL-
TIVY-
IVYS-
-AADS
-VALV
-FERN
-FVIF
-LNCY
-ALWA
-YSLL
-IHYL
-ITAA
-KKYL
-LTAA
-VTCG
-IKYL
-ITAA
-VLSG
-PELV
-SLRS
-SLHL
-IYYL
-ITAA
-IHYL
-ITAA
LGL--
GYS--
IVF--
WVL--
WVG--
DVC--
GLI--
MMF--
WVL--
DVC--
DVC--
GVA--
WVL--
DVC--
LDL--
CTH--
GVA--
WTM--
DVV--
FLN--
MLN--
KLM--
DYA--
IKE--
DVC--
YSF--
GVT--
WVG--
LMN--
RTF--
GAA--
VIT--
KLV--
SAF--
VFA--
LPL--
DVC--
VST--
GLL--
FSI--
YAG--
NVI--
EVY--
EVI--
VDA--
EII--
IRL--
NEV--
VVL--
GYL--
RVV--
FGL--


In [5]:
background_frequency = "./database/background_frequency.csv"
score_output = "ex_score_output.csv"

a.build_score_matrix(score_output, background_frequency)
# This builds a position-specific scoring matrix from the sequences found from the database
# The background frequency amends observed frequencies based on natural occurences
# The position specific score matrix is saved in "ex_score_output.csv"

# The primary output is the best complementary sequence for the target ("WIVWW")


Building a scoring matrix for YLLYY

Background amino acid frequency information: ./database/background_frequency.csv

Scoring matrix for the target:
Pos	1	2	3	4	5
Target	Y	L	L	Y	Y
A	-0.17	-0.10	-0.06	-0.30	-0.38
C	-0.13	0.33	-0.15	-0.14	0.05
D	0.30	1.30	1.47	0.93	0.99
E	0.25	0.36	0.21	0.28	0.38
F	-0.67	-0.79	-0.68	-0.67	-0.29
G	-0.07	-0.02	0.15	0.32	0.41
H	0.38	-0.10	-0.13	-0.34	0.35
I	-0.77	-1.06	-0.76	-0.84	-0.89
K	0.50	0.62	0.62	0.57	0.45
L	-0.57	-0.69	-0.76	-0.53	-0.42
M	0.34	0.11	-0.92	-0.41	-0.02
N	1.38	0.94	0.63	1.17	0.91
P	1.96	1.99	1.67	2.27	2.30
Q	-0.20	-0.09	0.61	-0.02	-0.20
R	-0.21	0.07	-0.08	0.17	0.49
S	0.41	0.56	0.31	-0.02	0.33
T	0.04	-0.10	-0.12	0.25	0.49
V	-0.85	-0.99	-0.97	-0.96	-0.95
W	-1.37	-0.60	-0.60	-0.97	-2.45
Y	-0.64	-0.80	-0.62	-0.89	-0.70
The scoring matrix was saved in ex_score_output.csv
The best complementarity peptide for YLLYY: WIVWW
Parallelity: Anti-parallel
Complementarity score: -6.825028


In [11]:
comp_seq = target[::-1]
randnum = 10000
# The target is a amyloid aggregate and forms an antiparallel beta sheet with its reverse sequence
# Now, we calculate the reverse sequence is how likely to be a complementary beta sheet sequence

a.compare_complementarity(comp_seq, randnum)
# This calculates the rank of the query complementary sequence against 10,000 random sequences
# Though there is not a definite cut-off value, out benchmark on amyloids shows that rank < 10 %
# forms beta sheets.


Comparing complementarity against YLLYY for YYLLY using 10000 random sequences

Random complementarity sequences and scores:

YPADF 1.931393
YRRES -0.047421
GFDEM 0.871139
QRKRL 0.246674
TWCHL -1.467589
MMKSI 0.161352
FNPHI 0.719618
MWWIG -1.290900
LPPQQ 2.878318
GWPTD 2.249001
WMDQY -0.508465
HDIGY 0.541916
RPMFW -2.262567
MGTGH 0.878763
KGDPF 3.921430
KMVTC -0.069390
VNHMT 0.046731
MGGGH 1.143999
PDRIA 1.960242
DKWWV -1.603013
KKFRF 0.319032
VWDTC 0.324788
CLDVI -1.192041
FQSFY -1.823958
KKYIF -0.634944
FGIQM -1.483109
SAAAV -1.002933
NHHED 2.420938
DPENY 2.966989
TLPWD 1.054999
NYMGS 0.318350
QPYNP 4.638458
AVVDP 1.098783
YPWTV 0.046152
ECKES 1.806851
CFTKC -0.417398
PVVPA 1.891667
VYKTK -0.319997
LHHKR 0.265976
ASASF 0.017413
MCDPN 5.319605
TECCY -0.585127
HYPMQ 0.645094
KVDIA -0.241773
AVSCC -0.941775
HPPVN 3.991682
HFMES -0.726089
SQHCN 0.963766
AKRCK 0.676107
MSNQS 1.846075
YTSHK -0.322064
PLQCC 1.805669
YDWNG 1.629489
GIPMK 0.588335
CEVKA -0.549702
LAKCF -0.469800
NPFLN 3.0744

FVIRQ -2.447533
LVTVV -3.573977
INYQD 0.529550
PVVRG 0.584099
GLHMM -1.308113
LLKAR -0.443824
KHTAV -0.963834
GEDEI 1.150361
RIGWM -2.106507
ALCRR -0.355440
QHCTP 2.103927
ATYQP 1.390464
ANNSA 0.996080
DGGSI -0.488592
ALVQT -1.357952
GIISE -1.529610
RPQSW -0.084975
GFDTQ 0.657353
FQMNK -0.056633
QQIWL -2.433221
TPLLW -1.712140
DPQMH 2.844787
QRTGT 0.572129
ELMPR 1.401649
DMYLW -3.201240
YRPDY 1.335517
PNHFW -0.349271
SPLSA 1.234754
PGLGA 1.121349
PILWR -0.338672
LNKPC 3.314233
RLNEA -0.371375
TQNID 0.734255
EARQE 0.425530
QVFAC -2.113104
WRMFF -3.171562
MIDDG 2.094936
PMLQR 1.780369
RTDYR 0.757748
HYWQV -1.985829
VDPQQ 1.907348
VMDPF 2.708511
WGTYG -1.994676
TSTNF 1.366340
YFETH -0.622303
TKELQ 0.142793
FYMWG -2.940965
ASAWN 0.271357
PKGEA 2.630489
PRCMY 0.782118
PNRYH 2.282992
WDFAI -1.936561
SCILM -0.568606
MFKSV -0.796576
SYPHW -1.506427
DVPGK 1.753294
ETSAL -0.262853
HCRAW -2.138807
FRMNR 0.142264
RWKFW -3.313763
DKDNL 3.131962
GTCRE 0.231370
YAPVA -0.414987
TIDCS 0.648489
WNFYR -1

MAEPN 3.631330
CWCTT -0.141602
PNKIS 3.019265
YDIKI -0.416901
EYVQV -2.477761
YFRHW -4.314684
QYLCG -1.480292
VMFPA 0.466932
YNHTF 0.129244
VRQRW -2.446150
HGIAR -0.219661
DIVMS -1.811177
QQLMP 0.845354
IWLWD -2.104504
CRAQF -0.419414
GPPFG 3.330706
NSNVQ 1.404592
RMCQR 0.213237
HKQVN 1.560111
CPTHN 2.319772
MQQAK 1.016817
DVQCW -2.668782
ERLVM -1.410611
YEVVS -1.884811
FFQPS 1.744667
AAWRY -1.410114
NTHQT 1.620870
FQYQK -0.944910
LPTME 1.283295
CPPNL 4.278180
PMHQC 1.971803
SPIVS 1.011782
SIKRL -0.283952
AQEQE 0.312587
MEPWL 0.985749
MQNDQ 1.613280
LQHRD 0.376437
HWDNQ 2.213707
CTQNK 2.003929
NDYDQ 2.789270
RTPDH 2.649227
EQNHQ 0.250860
CVCVI -3.117773
QPKDN 4.259925
TACDF 0.431622
HAGFL -0.670849
RDNEC 2.045611
KFHLY -1.659059
THGDK 1.480252
MTQQQ 0.640177
APNGN 3.678651
AGTMS -0.387745
EWCPM 1.750090
MLTTN 0.705494
LGEKT 0.686127
QHVWQ -2.433317
KSNYF 0.503407
MRPLC 1.610678
ADIFV -1.247402
HKTAM 0.561922
VPSWV -0.466969
YHPMS 0.857439
AQAHS -0.329498
PNSTW 1.009515
IKFMI -2.123304


TCAEP 2.884669
YNTAW -2.571906
KCYYC -0.642557
FFKEI -1.453506
VGFPP 3.015516
SPNSI 2.115762
HGVMA -1.404211
ISGEV -0.730298
TNPTN 3.820929
NWLTE 0.647409
EDSAS 1.887879
TNYNT 2.021610
EQHWG -0.530303
LYCQK -1.077791
SCLIK -0.417083
VQNQI -1.214380
FHMIP -0.224125
CWTLR -0.886565
QHACS -0.156175
SNGQF 1.196738
HCPFD 2.696903
RMMWD -0.994831
KKMCL -0.358790
VDSCD 1.618042
AWFEV -2.118866
FYNHD -0.179937
TTRNH 1.380072
KVSDC 0.794850
LSNMC 0.266938
DETQM 0.506544
AAQRF 0.216695
YMAFE -0.888519
MADDF 2.357164
KMHRW -1.815139
DIQTL -0.324794
LYCDH -0.227420
MGLWQ -1.610840
EYTMA -1.445014
YEHIG -0.851043
SHTII -1.533331
EQPLN 2.218127
LKIMK -0.658164
GTPQF 1.202726
PPKEA 4.473505
GCLNI -0.225721
LVTTV -2.364696
LSVFN -0.734369
LNSRE 1.232546
VPFMG 0.467608
YQNFY -1.475395
FLQLV -2.219009
EFNQR 0.556444
QAKWC -0.597523
MFWWP 0.277823
TANAS 0.596618
RSFLY -1.556806
GMILI -2.137663
QLVRA -2.063922
ISHPL 1.500531
LWCEC -0.989259
DVCLK -0.921652
PWANY 1.774949
KYLML -1.888363
PLQKN 3.376415
YRH

YFAKL -1.346541
AFVKC -1.316505
YFKID -0.663740
VTAHA -1.721653
ACWCL -1.007508
HTAGQ 0.338449
SHHWQ -0.990328
AGHII -2.059647
LCNSY -0.331461
EIYQS -1.116271
MTTEM 0.393130
ARNWY -1.138313
IAVAN -1.233331
NNVKM 1.906980
AGLEK -0.229739
SDSRS 2.514985
VTKWS -0.960466
QTTQM -0.441683
FRGVF -1.692545
TAPYT 1.207597
CMGKD 1.692619
SLEWR -0.545830
YHEQF -0.831094
HWVQG -0.802625
NHFWR 0.124009
KWNAV -0.722845
KFIDV -1.070105
VCCLI -2.092052
CSLFS -0.671837
LHPFH 0.693138
VMIFM -2.185396
SYHIP 0.937689
LHVNK -0.012882
IEVSQ -1.606315
SFSDS 1.182692
PQRER 2.554547
YLRSI -2.325863
CDKRC 2.012033
PWLDC 1.586811
MDWDQ 1.772883
VTEVW -4.144802
NRHCK 1.635768
STPEG 2.668735
IGPLE 0.729024
SCHIG 0.168311
HTKTY 0.451214
TQAEC 0.220294
MLWSY -1.665573
YWKTF -0.659007
AACHD 0.226711
AEFPT 2.262318
NMEPE 4.342289
TWHAV -1.936639
APKAA 1.756144
DIGYE -1.128898
IWNIQ -1.783990
DRRQK 0.719330
PNHGN 4.009837
QVPNK 2.106768
CTALK -0.363908
KSICE 0.541868
CPIKI 0.785858
QCRRF -0.073843
ELAGM -0.188052
LIPDH

EESVC 0.005819
QHWVQ -2.056840
YAKDD 1.799779
DYNVW -3.284184
DEQGC 1.637353
LYMPD 0.982668
IWTVH -2.091462
WWSLT -1.704502
DMVWM -1.551959
RMLKG 0.114361
RDICS 0.528366
DANYK 0.379355
LMWMT -0.977881
FPYRW -1.584698
TCTYE -0.260393
DQLTV -1.251451
SPIRD 2.803709
DECIL -0.760146
TYPPI 2.295829
VSYQT -0.436039
SKKIN 1.719754
FACVW -4.338781
MAWAT -0.174773
ISNHT 0.569451
FKCNM 0.948404
VKVVM -2.174654
NQGGP 4.057671
CMSQD 1.259106
YFRGY -1.900956
TPTQA 1.519827
EYEAC -0.582381
KWKYK 0.076621
YMHRW -2.954707
TWQCW -2.533425
VYFTP 0.228598
VHYNN 0.516151
SHYAI -1.498373
NKELV 0.733876
GKTQR 0.904572
QCLHA -1.349776
EKDGY 1.964172
EFFQD -0.242001
QYRQV -2.040397
HRKQI 0.165426
VKGCK 0.237026
MPFLV 0.181218
LMEFM -0.932083
NPPCE 5.284713
NNECQ 2.196909
EQAMA -0.683076
VGCGR -0.215192
NYTVY -1.191241
ETLSM -0.644053
GMVWT -1.411488
VWMCA -2.877868
RMSCK 0.516964
TVNGN 0.917323
EQVQA -1.205621
IYNWD -0.911146
SQICP 1.724248
EPKCQ 2.525644
WMDTT 0.946640
CEEWA -0.906116
RTADQ 0.362705
IYFFV -3

FATRE -0.339848
RDEPQ 3.366406
CHTQF -0.645332
GLHTE -0.257143
CQHKL -0.200980
NKQPW 2.419740
NKGNP 5.611819
HYTPF 1.442761
ARMSV -1.984263
TIWTK -0.920392
QWAYD -0.753625
HHHTE 0.777260
MPSCS 2.834817
HLIIR -1.421861
VNGDQ 0.975530
GNINV 0.337721
RKKGY 0.650973
RMVIE -1.536890
MKWES 0.967685
LLYKC -1.249532
CMTRV -0.915943
QLYIW -4.798060
IQAHV -2.203342
KTAEA 0.236127
QIFMA -2.720790
DNPRA 2.700808
FILNI -2.213911
IDYLW -3.072451
VCDIQ -0.093314
WPILC -0.618358
DYQYQ -0.982179
CPLPT 3.853157
TVMEL -2.005515
CIFVP -0.528957
LFWGG -1.232536
CMHIP 1.301672
TTEVY -1.499736
VHTEV -1.728204
GSSVY -0.861928
AHVFS -1.580642
ENTGH 1.753022
GPGWR 1.589938
DFRRC -0.364092
ICVQV -2.378645
IRGFI -2.106718
MVQKW -1.913210
KRWHR 0.115596
HHEDY 0.726529
SEYCI -0.877705
YCIHW -3.867604
IWYNF -1.109821
ARINM 0.294104
QWWNN 0.681491
QDGIR 0.900355
GREQC 0.251131
DPHHW -0.638230
KTNGA 0.967887
AKYLL -1.126231
AFWVI -3.418636
KVMGQ -1.289465
IMANP 2.742477
FELIH -1.560654
AIPDW -1.084270
TMMVC -1.676946


FHHDH 0.387252
NKWIW -1.901621
HTWQW -2.795038
MGVCA -1.165401
GYQPN 2.926268
AYDIA -0.718491
WLSYR -2.157346
LVCEW -3.881523
CIRVS -1.904828
KRGGS 1.370822
MIPEW -1.221614
PILYP 1.548189
LHQEY -0.470384
ECPEG 2.935151
MFDDG 2.362539
TGHGH 0.560309
IGNCF -0.587759
VLWPV -0.817848
MTPML 1.092340
AVVYN -2.112028
LMCWY -2.277607
YVLMW -5.252847
SFEIA -1.393871
MPQHK 3.059987
FMIDP 1.910310
PRPKQ 4.081492
LSLQP 1.515018
QEMVH -1.360758
SANHM 0.579545
IITLG -2.068444
HKYQS 0.687870
RWNIY -1.721696
SNMNW -0.851785
WNHQF -0.864297
VWREI -2.143201
WRDPH 2.792490
KVRYP 0.828335
LGIGN -0.112052
PHQIK 2.091740
QWYMC -1.772147
WWHAR -1.916609
VMFYK -1.859326
FKDVK 0.913045
EGDLE 1.550364
RNNMC 1.004313
PESIT 2.277132
VLIDC -1.309151
WSAGM -0.565142
DQTAV -1.157056
ECMDG 1.001735
FIQYR -1.524621
AMVRK -0.417050
SSWNH 1.884382
GYQDT 1.169969
LAMNL -0.839220
PCKFF 1.953844
QQWIK -1.278822
MHYEL -0.516338
RTSHV -1.287019
VRKGE 0.548529
YATVT -1.333622
CDRIS 0.576217
CLCFP 0.659567
HQSGV -0.033834
IMEL