In [12]:
import mhcflurry
from mhcflurry import Class1PresentationPredictor

%matplotlib inline

import re
import os
import sys
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Bio import Align
import matplotlib.pyplot as plt
import pandas as pd
from joblib import load
from igraph import Graph as igraph_graph, plot, rescale

# adding project folder to lib search path
project = "/".join((os.path.split(os.getcwd())[0], os.path.split(os.getcwd())[1], 'scripts'))
if project not in sys.path:
    sys.path.append(project)
    
import loader
import work_with_files
import classificator_k_mer
import tree_building
import patients_data
import data_prep_k_mer
import quantitative_analyze
import calculating_metrics

### Looking at p1

In [22]:
# loading pat

pat = patients_data.Patient('p1')

pat_reg_data = pat.regions.region.loc[pat.regions.region.name == 'V3']

In [32]:
# getting only peptides

pat_peptides = list(pat_reg_data.translated.apply(str))

In [42]:
# cutting peptides into pieces

pat_pept_pieces = []

for pepti in pat_peptides:
    i = 9
    j = 0
    
    length = len(pepti)
    
    while i < length:
        
        # will drop pieces with stop codon
        if '*' in pepti[j: i]:
            print(pepti[j: i])
            i += 2
            j += 2
            continue
        
        pat_pept_pieces.append(pepti[j: i])
        i += 2
        j += 2

NIHNSAP*R
HNSAP*RTC
SAP*RTCRN
P*RTCRNQL
RTRTSIL*N
RTSIL*NRR
SIL*NRRHN
L*NRRHNRR
KKSILCN*W
SILCN*WNK
LCN*WNKME
N*WNKMERN
ERNFKTGS*
NFKTGS*KI
KTGS*KIKK
GS*KIKKVL
*KIKKVL**
IKKVL**DN
KVL**DNSL
L**DNSL*T
*DNSL*TTL
NSL*TTLRR
L*TTLRRRS
EKLKKVL**
LKKVL**DN
KVL**DNSL
L**DNSL*T
*DNSL*TTL
NSL*TTLRR
L*TTLRRRS


In [23]:
# settings alleles

pat_hla = ['HLA-A*02:02', 'HLA-B*08:15','HLA-C*03:06']

In [49]:
# making predictior and making him doing his stuff

predictor = Class1PresentationPredictor.load()

pept_pred = predictor.predict(peptides=pat_pept_pieces,
                  alleles=pat_hla,
                  verbose=0)

In [50]:
pept_pred.head(10)

Unnamed: 0,peptide,peptide_num,sample_name,affinity,best_allele,processing_score,presentation_score
0,VVSTQLLLN,0,sample1,6756.736838,HLA-C*03:06,0.01232,0.016115
1,STQLLLNGS,1,sample1,9434.888982,HLA-C*03:06,0.002679,0.011626
2,QLLLNGSLA,2,sample1,807.82665,HLA-A*02:02,0.000725,0.09403
3,LLNGSLAEE,3,sample1,2986.290252,HLA-C*03:06,0.045206,0.036655
4,NGSLAEEDI,4,sample1,3850.943457,HLA-C*03:06,0.000458,0.025226
5,SLAEEDIII,5,sample1,34.558743,HLA-A*02:02,0.043236,0.66534
6,AEEDIIIRS,6,sample1,11481.625933,HLA-C*03:06,0.412943,0.040886
7,EDIIIRSEN,7,sample1,12955.944312,HLA-C*03:06,0.014963,0.009185
8,IIIRSENIT,8,sample1,7547.506689,HLA-C*03:06,0.006119,0.014313
9,IRSENITNN,9,sample1,14037.152737,HLA-C*03:06,0.011346,0.008451


In [48]:
# watching affinity

pept_pred.loc[pept_pred.affinity < 1000].head(10)

Unnamed: 0,peptide,peptide_num,sample_name,affinity,best_allele,processing_score,presentation_score
2,QLLLNGSLA,2,sample1,807.82665,HLA-A*02:02,0.000725,0.09403
5,SLAEEDIII,5,sample1,34.558743,HLA-A*02:02,0.043236,0.66534
10,SENITNNAK,10,sample1,720.872168,HLA-C*03:06,0.0298,0.11299
11,NITNNAKTI,11,sample1,164.981463,HLA-C*03:06,0.014315,0.308923
12,TNNAKTIIV,12,sample1,384.053326,HLA-C*03:06,0.513307,0.555321
13,NAKTIIVHL,13,sample1,26.802362,HLA-C*03:06,0.658349,0.957107
15,IIVHLKEPV,15,sample1,71.749204,HLA-A*02:02,0.010083,0.479957
21,CTRPSNNTI,21,sample1,80.899589,HLA-C*03:06,0.02281,0.464675
23,SNNTIASVR,23,sample1,580.641151,HLA-C*03:06,0.33507,0.314239
26,SVRIGPGQV,26,sample1,70.765338,HLA-C*03:06,0.039363,0.5091
