# Reverse "indexing"
Instead of trying to index all of the k-mers in a database and store it in RAM, lets pre-process the spectra and do 1 sweep through the database, finding the best scoring k-mers in the database for each spectrum

### Steps
1. Load spectra into memory
2. Turn each spectra into a sparse 1s and 0s matrix 
3. Load the database into RAM
4. For each protein
    1. For each MAX_LENGTH-mer
        1. Generate the spectrum for this k-mer
        2. Sparsify the spectrum
        3. Calculate the dot product of this sparse k-mer against the input
        4. For each spectrum
            1. Keep track of the top X scoring peptides by (protein, position)
    2. Do for both b and y ions (seperately)

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from pyteomics import fasta
from collections import defaultdict
from src.sequence.gen_spectra import gen_spectrum
from src.file_io import mzML
from src.objects import Spectrum
from ms_deisotope import MSFileReader

import numpy as np

In [2]:
def longest_array(window, length):
    return int(np.ceil(1/window * length * 186.079313))

In [3]:
def longest_peptide(max_mass):
    return int(np.ceil(max_mass / 57.021464))

In [4]:
def sparse_it(spectrum, window, length):
    sparse = np.zeros(longest_array(window, length))
    for mz in spectrum:
        mz_direct = int(mz/window)
        sparse[mz_direct] = 1
    
    return sparse

In [5]:
fasta_file = '/Users/zacharymcgrath/Desktop/nod2 data/filteredNOD2.fasta'
spectra_file = '/Users/zacharymcgrath/Desktop/nod2 data/single/singleRealSpectrum.mzml'
window_size = .01

## 1. Load spectra into memory

In [6]:
spectra = MSFileReader(spectra_file)
for s in spectra:
    print(s)
    

In [7]:
max_mass = max([x for spec in spectra for x in spec.spectrum])
print(f'Max mass: {max_mass}')
print(f'Max peptide length: {longest_peptide(max_mass)}')
max_length = longest_peptide(max_mass)
avg_mass = np.mean([x for spec in spectra for x in spec.spectrum])
avg_len = longest_peptide(avg_mass)
print(f'Average mass: {avg_mass}')
print(f'Average peptide length: {avg_len}')

Max mass: 1311.6202392578125
Max peptide length: 24
Average mass: 374.51652265760634
Average peptide length: 7


## 2. Sparsify the spectra

In [8]:
sparsified_spectra = np.zeros((len(spectra), longest_array(window_size, max_length)))

In [9]:
for i, spec in enumerate(spectra):
    print(f'\rSparsifying spectrum {i}/{len(spectra)}', end='')
    sparsified_spectra[i] = sparse_it(spec.spectrum, window_size, max_length)

Sparsifying spectrum 0/9Sparsifying spectrum 1/9Sparsifying spectrum 2/9Sparsifying spectrum 3/9Sparsifying spectrum 4/9Sparsifying spectrum 5/9Sparsifying spectrum 6/9Sparsifying spectrum 7/9Sparsifying spectrum 8/9

## 3. Load database into RAM

In [10]:
prots = {}
for i, prot in enumerate(fasta.read(fasta_file)):
    prot_name = prot.description.split('|')[-1].split(' ')[0]
    prots[prot_name] = prot.sequence
    

## 4. Go through each protein and each MAX_LENGTH-mer and calculate the scores

In [11]:
top_x = 10
top_b = defaultdict(list)
top_y = defaultdict(list)
top_by = defaultdict(list)

def add_score(ion, spec_idx, score, prot, start_idx):
    d = top_b if ion == 'b' else (top_y if ion == 'y' else top_by)
    
    new_entry = (score, prot, start_idx)
    
    if len(d[spec_idx]) > top_x and score >= d[spec_idx][-1][0]:
        d[spec_idx].append(new_entry)
        d[spec_idx].sort(key=lambda x: x[0], reverse=True)
        d[spec_idx] = d[spec_idx][:top_x]
        
    else:
        d[spec_idx] = sorted(d[spec_idx] + [new_entry], key=lambda x: x[0], reverse=True)
        
def score_kmer(kmer, prot_name, start_idx):
    b_spec = gen_spectrum(kmer, ion='b')['spectrum']
    y_spec = gen_spectrum(kmer, ion='y')['spectrum']
    spec = gen_spectrum(kmer)['spectrum']
    
    sparse_b = sparse_it(b_spec, window_size, max_length)
    sparse_y = sparse_it(y_spec, window_size, max_length)
    sparse = sparse_it(spec, window_size, max_length)
    
    b_scores = np.dot(sparsified_spectra, sparse_b)
    y_scores = np.dot(sparsified_spectra, sparse_y)
    scores = np.dot(sparsified_spectra, sparse)
    
    [add_score('b', i, score, prot_name, start_idx) for i, score in enumerate(b_scores)]
    [add_score('y', i, score, prot_name, start_idx + len(kmer) - 1) for i, score in enumerate(b_scores)]
    [add_score('by', i, score, prot_name, start_idx) for i, score in enumerate(scores)]
    

In [12]:
plen = len(prots)

for i, (prot_name, prot_seq) in enumerate(prots.items()):
    
    # if the protein is less than our max_len, just do 1 calculation
    if len(prot_seq) < max_length:
        score_kmer(prot_seq, prot_name, 0)
        
    # otherwise go through the entire thing
    for j in range(len(prot_seq) - max_length):
        print(f'\rOn protein {i}/{plen} [{int(100 * (i+1) / plen)}%]     ({int(100 * (j+1)/(len(prot_seq)-max_length))}% Done with protein)', end='')

        score_kmer(prot_seq[j:j+max_length], prot_name, j)
        

On protein 0/279 [0%]     (0% Done with protein)On protein 0/279 [0%]     (1% Done with protein)On protein 0/279 [0%]     (2% Done with protein)On protein 0/279 [0%]     (3% Done with protein)On protein 0/279 [0%]     (4% Done with protein)On protein 0/279 [0%]     (5% Done with protein)On protein 0/279 [0%]     (6% Done with protein)On protein 0/279 [0%]     (7% Done with protein)On protein 0/279 [0%]     (8% Done with protein)On protein 0/279 [0%]     (9% Done with protein)On protein 0/279 [0%]     (9% Done with protein)On protein 0/279 [0%]     (10% Done with protein)On protein 0/279 [0%]     (11% Done with protein)On protein 0/279 [0%]     (12% Done with protein)On protein 0/279 [0%]     (13% Done with protein)On protein 0/279 [0%]     (14% Done with protein)On protein 0/279 [0%]     (15% Done with protein)On protein 0/279 [0%]     (16% Done with protein)On protein 0/279 [0%]     (17% Done with protein)On protein 0/279 [0%]     (18% Done with protein)On protein

On protein 1/279 [0%]     (46% Done with protein)On protein 1/279 [0%]     (46% Done with protein)On protein 1/279 [0%]     (46% Done with protein)On protein 1/279 [0%]     (47% Done with protein)On protein 1/279 [0%]     (47% Done with protein)On protein 1/279 [0%]     (47% Done with protein)On protein 1/279 [0%]     (47% Done with protein)On protein 1/279 [0%]     (48% Done with protein)On protein 1/279 [0%]     (48% Done with protein)On protein 1/279 [0%]     (48% Done with protein)On protein 1/279 [0%]     (48% Done with protein)On protein 1/279 [0%]     (49% Done with protein)On protein 1/279 [0%]     (49% Done with protein)On protein 1/279 [0%]     (49% Done with protein)On protein 1/279 [0%]     (50% Done with protein)On protein 1/279 [0%]     (50% Done with protein)On protein 1/279 [0%]     (50% Done with protein)On protein 1/279 [0%]     (50% Done with protein)On protein 1/279 [0%]     (51% Done with protein)On protein 1/279 [0%]     (51% Done with protein)

On protein 2/279 [1%]     (51% Done with protein)On protein 2/279 [1%]     (52% Done with protein)On protein 2/279 [1%]     (53% Done with protein)On protein 2/279 [1%]     (53% Done with protein)On protein 2/279 [1%]     (54% Done with protein)On protein 2/279 [1%]     (55% Done with protein)On protein 2/279 [1%]     (56% Done with protein)On protein 2/279 [1%]     (56% Done with protein)On protein 2/279 [1%]     (57% Done with protein)On protein 2/279 [1%]     (58% Done with protein)On protein 2/279 [1%]     (59% Done with protein)On protein 2/279 [1%]     (59% Done with protein)On protein 2/279 [1%]     (60% Done with protein)On protein 2/279 [1%]     (61% Done with protein)On protein 2/279 [1%]     (62% Done with protein)On protein 2/279 [1%]     (62% Done with protein)On protein 2/279 [1%]     (63% Done with protein)On protein 2/279 [1%]     (64% Done with protein)On protein 2/279 [1%]     (65% Done with protein)On protein 2/279 [1%]     (65% Done with protein)

On protein 3/279 [1%]     (19% Done with protein)On protein 3/279 [1%]     (19% Done with protein)On protein 3/279 [1%]     (19% Done with protein)On protein 3/279 [1%]     (19% Done with protein)On protein 3/279 [1%]     (19% Done with protein)On protein 3/279 [1%]     (19% Done with protein)On protein 3/279 [1%]     (19% Done with protein)On protein 3/279 [1%]     (19% Done with protein)On protein 3/279 [1%]     (20% Done with protein)On protein 3/279 [1%]     (20% Done with protein)On protein 3/279 [1%]     (20% Done with protein)On protein 3/279 [1%]     (20% Done with protein)On protein 3/279 [1%]     (20% Done with protein)On protein 3/279 [1%]     (20% Done with protein)On protein 3/279 [1%]     (20% Done with protein)On protein 3/279 [1%]     (20% Done with protein)On protein 3/279 [1%]     (20% Done with protein)On protein 3/279 [1%]     (20% Done with protein)On protein 3/279 [1%]     (21% Done with protein)On protein 3/279 [1%]     (21% Done with protein)

On protein 3/279 [1%]     (39% Done with protein)On protein 3/279 [1%]     (39% Done with protein)On protein 3/279 [1%]     (39% Done with protein)On protein 3/279 [1%]     (39% Done with protein)On protein 3/279 [1%]     (39% Done with protein)On protein 3/279 [1%]     (39% Done with protein)On protein 3/279 [1%]     (39% Done with protein)On protein 3/279 [1%]     (39% Done with protein)On protein 3/279 [1%]     (40% Done with protein)On protein 3/279 [1%]     (40% Done with protein)On protein 3/279 [1%]     (40% Done with protein)On protein 3/279 [1%]     (40% Done with protein)On protein 3/279 [1%]     (40% Done with protein)On protein 3/279 [1%]     (40% Done with protein)On protein 3/279 [1%]     (40% Done with protein)On protein 3/279 [1%]     (40% Done with protein)On protein 3/279 [1%]     (40% Done with protein)On protein 3/279 [1%]     (41% Done with protein)On protein 3/279 [1%]     (41% Done with protein)On protein 3/279 [1%]     (41% Done with protein)

On protein 3/279 [1%]     (67% Done with protein)On protein 3/279 [1%]     (67% Done with protein)On protein 3/279 [1%]     (67% Done with protein)On protein 3/279 [1%]     (67% Done with protein)On protein 3/279 [1%]     (67% Done with protein)On protein 3/279 [1%]     (67% Done with protein)On protein 3/279 [1%]     (67% Done with protein)On protein 3/279 [1%]     (67% Done with protein)On protein 3/279 [1%]     (67% Done with protein)On protein 3/279 [1%]     (68% Done with protein)On protein 3/279 [1%]     (68% Done with protein)On protein 3/279 [1%]     (68% Done with protein)On protein 3/279 [1%]     (68% Done with protein)On protein 3/279 [1%]     (68% Done with protein)On protein 3/279 [1%]     (68% Done with protein)On protein 3/279 [1%]     (68% Done with protein)On protein 3/279 [1%]     (68% Done with protein)On protein 3/279 [1%]     (68% Done with protein)On protein 3/279 [1%]     (69% Done with protein)On protein 3/279 [1%]     (69% Done with protein)

On protein 4/279 [1%]     (12% Done with protein)On protein 4/279 [1%]     (12% Done with protein)On protein 4/279 [1%]     (13% Done with protein)On protein 4/279 [1%]     (14% Done with protein)On protein 4/279 [1%]     (15% Done with protein)On protein 4/279 [1%]     (15% Done with protein)On protein 4/279 [1%]     (16% Done with protein)On protein 4/279 [1%]     (17% Done with protein)On protein 4/279 [1%]     (17% Done with protein)On protein 4/279 [1%]     (18% Done with protein)On protein 4/279 [1%]     (19% Done with protein)On protein 4/279 [1%]     (20% Done with protein)On protein 4/279 [1%]     (20% Done with protein)On protein 4/279 [1%]     (21% Done with protein)On protein 4/279 [1%]     (22% Done with protein)On protein 4/279 [1%]     (22% Done with protein)On protein 4/279 [1%]     (23% Done with protein)On protein 4/279 [1%]     (24% Done with protein)On protein 4/279 [1%]     (25% Done with protein)On protein 4/279 [1%]     (25% Done with protein)

On protein 5/279 [2%]     (48% Done with protein)On protein 5/279 [2%]     (48% Done with protein)On protein 5/279 [2%]     (49% Done with protein)On protein 5/279 [2%]     (49% Done with protein)On protein 5/279 [2%]     (49% Done with protein)On protein 5/279 [2%]     (49% Done with protein)On protein 5/279 [2%]     (49% Done with protein)On protein 5/279 [2%]     (50% Done with protein)On protein 5/279 [2%]     (50% Done with protein)On protein 5/279 [2%]     (50% Done with protein)On protein 5/279 [2%]     (50% Done with protein)On protein 5/279 [2%]     (50% Done with protein)On protein 5/279 [2%]     (51% Done with protein)On protein 5/279 [2%]     (51% Done with protein)On protein 5/279 [2%]     (51% Done with protein)On protein 5/279 [2%]     (51% Done with protein)On protein 5/279 [2%]     (51% Done with protein)On protein 5/279 [2%]     (52% Done with protein)On protein 5/279 [2%]     (52% Done with protein)On protein 5/279 [2%]     (52% Done with protein)

On protein 6/279 [2%]     (26% Done with protein)On protein 6/279 [2%]     (26% Done with protein)On protein 6/279 [2%]     (26% Done with protein)On protein 6/279 [2%]     (27% Done with protein)On protein 6/279 [2%]     (27% Done with protein)On protein 6/279 [2%]     (27% Done with protein)On protein 6/279 [2%]     (28% Done with protein)On protein 6/279 [2%]     (28% Done with protein)On protein 6/279 [2%]     (28% Done with protein)On protein 6/279 [2%]     (29% Done with protein)On protein 6/279 [2%]     (29% Done with protein)On protein 6/279 [2%]     (29% Done with protein)On protein 6/279 [2%]     (30% Done with protein)On protein 6/279 [2%]     (30% Done with protein)On protein 6/279 [2%]     (30% Done with protein)On protein 6/279 [2%]     (31% Done with protein)On protein 6/279 [2%]     (31% Done with protein)On protein 6/279 [2%]     (31% Done with protein)On protein 6/279 [2%]     (32% Done with protein)On protein 6/279 [2%]     (32% Done with protein)

On protein 7/279 [2%]     (53% Done with protein)On protein 7/279 [2%]     (53% Done with protein)On protein 7/279 [2%]     (54% Done with protein)On protein 7/279 [2%]     (54% Done with protein)On protein 7/279 [2%]     (54% Done with protein)On protein 7/279 [2%]     (55% Done with protein)On protein 7/279 [2%]     (55% Done with protein)On protein 7/279 [2%]     (56% Done with protein)On protein 7/279 [2%]     (56% Done with protein)On protein 7/279 [2%]     (57% Done with protein)On protein 7/279 [2%]     (57% Done with protein)On protein 7/279 [2%]     (58% Done with protein)On protein 7/279 [2%]     (58% Done with protein)On protein 7/279 [2%]     (58% Done with protein)On protein 7/279 [2%]     (59% Done with protein)On protein 7/279 [2%]     (59% Done with protein)On protein 7/279 [2%]     (60% Done with protein)On protein 7/279 [2%]     (60% Done with protein)On protein 7/279 [2%]     (61% Done with protein)On protein 7/279 [2%]     (61% Done with protein)

On protein 8/279 [3%]     (67% Done with protein)On protein 8/279 [3%]     (67% Done with protein)On protein 8/279 [3%]     (68% Done with protein)On protein 8/279 [3%]     (68% Done with protein)On protein 8/279 [3%]     (68% Done with protein)On protein 8/279 [3%]     (68% Done with protein)On protein 8/279 [3%]     (69% Done with protein)On protein 8/279 [3%]     (69% Done with protein)On protein 8/279 [3%]     (69% Done with protein)On protein 8/279 [3%]     (70% Done with protein)On protein 8/279 [3%]     (70% Done with protein)On protein 8/279 [3%]     (70% Done with protein)On protein 8/279 [3%]     (70% Done with protein)On protein 8/279 [3%]     (71% Done with protein)On protein 8/279 [3%]     (71% Done with protein)On protein 8/279 [3%]     (71% Done with protein)On protein 8/279 [3%]     (72% Done with protein)On protein 8/279 [3%]     (72% Done with protein)On protein 8/279 [3%]     (72% Done with protein)On protein 8/279 [3%]     (72% Done with protein)

On protein 9/279 [3%]     (69% Done with protein)On protein 9/279 [3%]     (70% Done with protein)On protein 9/279 [3%]     (70% Done with protein)On protein 9/279 [3%]     (70% Done with protein)On protein 9/279 [3%]     (71% Done with protein)On protein 9/279 [3%]     (71% Done with protein)On protein 9/279 [3%]     (71% Done with protein)On protein 9/279 [3%]     (71% Done with protein)On protein 9/279 [3%]     (72% Done with protein)On protein 9/279 [3%]     (72% Done with protein)On protein 9/279 [3%]     (72% Done with protein)On protein 9/279 [3%]     (73% Done with protein)On protein 9/279 [3%]     (73% Done with protein)On protein 9/279 [3%]     (73% Done with protein)On protein 9/279 [3%]     (73% Done with protein)On protein 9/279 [3%]     (74% Done with protein)On protein 9/279 [3%]     (74% Done with protein)On protein 9/279 [3%]     (74% Done with protein)On protein 9/279 [3%]     (75% Done with protein)On protein 9/279 [3%]     (75% Done with protein)

On protein 11/279 [4%]     (52% Done with protein)On protein 11/279 [4%]     (52% Done with protein)On protein 11/279 [4%]     (53% Done with protein)On protein 11/279 [4%]     (53% Done with protein)On protein 11/279 [4%]     (54% Done with protein)On protein 11/279 [4%]     (54% Done with protein)On protein 11/279 [4%]     (55% Done with protein)On protein 11/279 [4%]     (55% Done with protein)On protein 11/279 [4%]     (56% Done with protein)On protein 11/279 [4%]     (56% Done with protein)On protein 11/279 [4%]     (57% Done with protein)On protein 11/279 [4%]     (57% Done with protein)On protein 11/279 [4%]     (58% Done with protein)On protein 11/279 [4%]     (58% Done with protein)On protein 11/279 [4%]     (59% Done with protein)On protein 11/279 [4%]     (59% Done with protein)On protein 11/279 [4%]     (60% Done with protein)On protein 11/279 [4%]     (60% Done with protein)On protein 11/279 [4%]     (61% Done with protein)On protein 11/279 [4%]     (61

On protein 13/279 [5%]     (18% Done with protein)On protein 13/279 [5%]     (18% Done with protein)On protein 13/279 [5%]     (18% Done with protein)On protein 13/279 [5%]     (19% Done with protein)On protein 13/279 [5%]     (19% Done with protein)On protein 13/279 [5%]     (19% Done with protein)On protein 13/279 [5%]     (19% Done with protein)On protein 13/279 [5%]     (19% Done with protein)On protein 13/279 [5%]     (19% Done with protein)On protein 13/279 [5%]     (20% Done with protein)On protein 13/279 [5%]     (20% Done with protein)On protein 13/279 [5%]     (20% Done with protein)On protein 13/279 [5%]     (20% Done with protein)On protein 13/279 [5%]     (20% Done with protein)On protein 13/279 [5%]     (20% Done with protein)On protein 13/279 [5%]     (21% Done with protein)On protein 13/279 [5%]     (21% Done with protein)On protein 13/279 [5%]     (21% Done with protein)On protein 13/279 [5%]     (21% Done with protein)On protein 13/279 [5%]     (21

On protein 13/279 [5%]     (77% Done with protein)On protein 13/279 [5%]     (77% Done with protein)On protein 13/279 [5%]     (77% Done with protein)On protein 13/279 [5%]     (78% Done with protein)On protein 13/279 [5%]     (78% Done with protein)On protein 13/279 [5%]     (78% Done with protein)On protein 13/279 [5%]     (78% Done with protein)On protein 13/279 [5%]     (78% Done with protein)On protein 13/279 [5%]     (78% Done with protein)On protein 13/279 [5%]     (79% Done with protein)On protein 13/279 [5%]     (79% Done with protein)On protein 13/279 [5%]     (79% Done with protein)On protein 13/279 [5%]     (79% Done with protein)On protein 13/279 [5%]     (79% Done with protein)On protein 13/279 [5%]     (79% Done with protein)On protein 13/279 [5%]     (80% Done with protein)On protein 13/279 [5%]     (80% Done with protein)On protein 13/279 [5%]     (80% Done with protein)On protein 13/279 [5%]     (80% Done with protein)On protein 13/279 [5%]     (80

On protein 14/279 [5%]     (19% Done with protein)On protein 14/279 [5%]     (20% Done with protein)On protein 14/279 [5%]     (20% Done with protein)On protein 14/279 [5%]     (20% Done with protein)On protein 14/279 [5%]     (20% Done with protein)On protein 14/279 [5%]     (20% Done with protein)On protein 14/279 [5%]     (20% Done with protein)On protein 14/279 [5%]     (20% Done with protein)On protein 14/279 [5%]     (20% Done with protein)On protein 14/279 [5%]     (20% Done with protein)On protein 14/279 [5%]     (21% Done with protein)On protein 14/279 [5%]     (21% Done with protein)On protein 14/279 [5%]     (21% Done with protein)On protein 14/279 [5%]     (21% Done with protein)On protein 14/279 [5%]     (21% Done with protein)On protein 14/279 [5%]     (21% Done with protein)On protein 14/279 [5%]     (21% Done with protein)On protein 14/279 [5%]     (21% Done with protein)On protein 14/279 [5%]     (21% Done with protein)On protein 14/279 [5%]     (22

On protein 14/279 [5%]     (43% Done with protein)On protein 14/279 [5%]     (43% Done with protein)On protein 14/279 [5%]     (44% Done with protein)On protein 14/279 [5%]     (44% Done with protein)On protein 14/279 [5%]     (44% Done with protein)On protein 14/279 [5%]     (44% Done with protein)On protein 14/279 [5%]     (44% Done with protein)On protein 14/279 [5%]     (44% Done with protein)On protein 14/279 [5%]     (44% Done with protein)On protein 14/279 [5%]     (44% Done with protein)On protein 14/279 [5%]     (44% Done with protein)On protein 14/279 [5%]     (45% Done with protein)On protein 14/279 [5%]     (45% Done with protein)On protein 14/279 [5%]     (45% Done with protein)On protein 14/279 [5%]     (45% Done with protein)On protein 14/279 [5%]     (45% Done with protein)On protein 14/279 [5%]     (45% Done with protein)On protein 14/279 [5%]     (45% Done with protein)On protein 14/279 [5%]     (45% Done with protein)On protein 14/279 [5%]     (45

On protein 14/279 [5%]     (69% Done with protein)On protein 14/279 [5%]     (70% Done with protein)On protein 14/279 [5%]     (70% Done with protein)On protein 14/279 [5%]     (70% Done with protein)On protein 14/279 [5%]     (70% Done with protein)On protein 14/279 [5%]     (70% Done with protein)On protein 14/279 [5%]     (70% Done with protein)On protein 14/279 [5%]     (70% Done with protein)On protein 14/279 [5%]     (70% Done with protein)On protein 14/279 [5%]     (70% Done with protein)On protein 14/279 [5%]     (70% Done with protein)On protein 14/279 [5%]     (71% Done with protein)On protein 14/279 [5%]     (71% Done with protein)On protein 14/279 [5%]     (71% Done with protein)On protein 14/279 [5%]     (71% Done with protein)On protein 14/279 [5%]     (71% Done with protein)On protein 14/279 [5%]     (71% Done with protein)On protein 14/279 [5%]     (71% Done with protein)On protein 14/279 [5%]     (71% Done with protein)On protein 14/279 [5%]     (71

On protein 15/279 [5%]     (8% Done with protein)On protein 15/279 [5%]     (9% Done with protein)On protein 15/279 [5%]     (10% Done with protein)On protein 15/279 [5%]     (11% Done with protein)On protein 15/279 [5%]     (11% Done with protein)On protein 15/279 [5%]     (12% Done with protein)On protein 15/279 [5%]     (13% Done with protein)On protein 15/279 [5%]     (14% Done with protein)On protein 15/279 [5%]     (15% Done with protein)On protein 15/279 [5%]     (16% Done with protein)On protein 15/279 [5%]     (16% Done with protein)On protein 15/279 [5%]     (17% Done with protein)On protein 15/279 [5%]     (18% Done with protein)On protein 15/279 [5%]     (19% Done with protein)On protein 15/279 [5%]     (20% Done with protein)On protein 15/279 [5%]     (21% Done with protein)On protein 15/279 [5%]     (22% Done with protein)On protein 15/279 [5%]     (22% Done with protein)On protein 15/279 [5%]     (23% Done with protein)On protein 15/279 [5%]     (24% 

On protein 17/279 [6%]     (43% Done with protein)On protein 17/279 [6%]     (43% Done with protein)On protein 17/279 [6%]     (44% Done with protein)On protein 17/279 [6%]     (44% Done with protein)On protein 17/279 [6%]     (45% Done with protein)On protein 17/279 [6%]     (45% Done with protein)On protein 17/279 [6%]     (45% Done with protein)On protein 17/279 [6%]     (46% Done with protein)On protein 17/279 [6%]     (46% Done with protein)On protein 17/279 [6%]     (47% Done with protein)On protein 17/279 [6%]     (47% Done with protein)On protein 17/279 [6%]     (48% Done with protein)On protein 17/279 [6%]     (48% Done with protein)On protein 17/279 [6%]     (49% Done with protein)On protein 17/279 [6%]     (49% Done with protein)On protein 17/279 [6%]     (50% Done with protein)On protein 17/279 [6%]     (50% Done with protein)On protein 17/279 [6%]     (50% Done with protein)On protein 17/279 [6%]     (51% Done with protein)On protein 17/279 [6%]     (51

On protein 19/279 [7%]     (0% Done with protein)On protein 19/279 [7%]     (0% Done with protein)On protein 19/279 [7%]     (1% Done with protein)On protein 19/279 [7%]     (1% Done with protein)On protein 19/279 [7%]     (1% Done with protein)On protein 19/279 [7%]     (2% Done with protein)On protein 19/279 [7%]     (2% Done with protein)On protein 19/279 [7%]     (2% Done with protein)On protein 19/279 [7%]     (3% Done with protein)On protein 19/279 [7%]     (3% Done with protein)On protein 19/279 [7%]     (4% Done with protein)On protein 19/279 [7%]     (4% Done with protein)On protein 19/279 [7%]     (4% Done with protein)On protein 19/279 [7%]     (5% Done with protein)On protein 19/279 [7%]     (5% Done with protein)On protein 19/279 [7%]     (5% Done with protein)On protein 19/279 [7%]     (6% Done with protein)On protein 19/279 [7%]     (6% Done with protein)On protein 19/279 [7%]     (6% Done with protein)On protein 19/279 [7%]     (7% Done with protein)

On protein 19/279 [7%]     (84% Done with protein)On protein 19/279 [7%]     (84% Done with protein)On protein 19/279 [7%]     (85% Done with protein)On protein 19/279 [7%]     (85% Done with protein)On protein 19/279 [7%]     (85% Done with protein)On protein 19/279 [7%]     (86% Done with protein)On protein 19/279 [7%]     (86% Done with protein)On protein 19/279 [7%]     (86% Done with protein)On protein 19/279 [7%]     (87% Done with protein)On protein 19/279 [7%]     (87% Done with protein)On protein 19/279 [7%]     (88% Done with protein)On protein 19/279 [7%]     (88% Done with protein)On protein 19/279 [7%]     (88% Done with protein)On protein 19/279 [7%]     (89% Done with protein)On protein 19/279 [7%]     (89% Done with protein)On protein 19/279 [7%]     (89% Done with protein)On protein 19/279 [7%]     (90% Done with protein)On protein 19/279 [7%]     (90% Done with protein)On protein 19/279 [7%]     (90% Done with protein)On protein 19/279 [7%]     (91

On protein 23/279 [8%]     (9% Done with protein)On protein 23/279 [8%]     (9% Done with protein)On protein 23/279 [8%]     (10% Done with protein)On protein 23/279 [8%]     (10% Done with protein)On protein 23/279 [8%]     (10% Done with protein)On protein 23/279 [8%]     (11% Done with protein)On protein 23/279 [8%]     (11% Done with protein)On protein 23/279 [8%]     (11% Done with protein)On protein 23/279 [8%]     (11% Done with protein)On protein 23/279 [8%]     (12% Done with protein)On protein 23/279 [8%]     (12% Done with protein)On protein 23/279 [8%]     (12% Done with protein)On protein 23/279 [8%]     (13% Done with protein)On protein 23/279 [8%]     (13% Done with protein)On protein 23/279 [8%]     (13% Done with protein)On protein 23/279 [8%]     (14% Done with protein)On protein 23/279 [8%]     (14% Done with protein)On protein 23/279 [8%]     (14% Done with protein)On protein 23/279 [8%]     (14% Done with protein)On protein 23/279 [8%]     (15% 

On protein 24/279 [8%]     (34% Done with protein)On protein 24/279 [8%]     (35% Done with protein)On protein 24/279 [8%]     (35% Done with protein)On protein 24/279 [8%]     (36% Done with protein)On protein 24/279 [8%]     (36% Done with protein)On protein 24/279 [8%]     (36% Done with protein)On protein 24/279 [8%]     (37% Done with protein)On protein 24/279 [8%]     (37% Done with protein)On protein 24/279 [8%]     (38% Done with protein)On protein 24/279 [8%]     (38% Done with protein)On protein 24/279 [8%]     (38% Done with protein)On protein 24/279 [8%]     (39% Done with protein)On protein 24/279 [8%]     (39% Done with protein)On protein 24/279 [8%]     (40% Done with protein)On protein 24/279 [8%]     (40% Done with protein)On protein 24/279 [8%]     (40% Done with protein)On protein 24/279 [8%]     (41% Done with protein)On protein 24/279 [8%]     (41% Done with protein)On protein 24/279 [8%]     (42% Done with protein)On protein 24/279 [8%]     (42

On protein 25/279 [9%]     (22% Done with protein)On protein 25/279 [9%]     (23% Done with protein)On protein 25/279 [9%]     (23% Done with protein)On protein 25/279 [9%]     (23% Done with protein)On protein 25/279 [9%]     (23% Done with protein)On protein 25/279 [9%]     (23% Done with protein)On protein 25/279 [9%]     (23% Done with protein)On protein 25/279 [9%]     (24% Done with protein)On protein 25/279 [9%]     (24% Done with protein)On protein 25/279 [9%]     (24% Done with protein)On protein 25/279 [9%]     (24% Done with protein)On protein 25/279 [9%]     (24% Done with protein)On protein 25/279 [9%]     (24% Done with protein)On protein 25/279 [9%]     (25% Done with protein)On protein 25/279 [9%]     (25% Done with protein)On protein 25/279 [9%]     (25% Done with protein)On protein 25/279 [9%]     (25% Done with protein)On protein 25/279 [9%]     (25% Done with protein)On protein 25/279 [9%]     (25% Done with protein)On protein 25/279 [9%]     (26

On protein 25/279 [9%]     (58% Done with protein)On protein 25/279 [9%]     (59% Done with protein)On protein 25/279 [9%]     (59% Done with protein)On protein 25/279 [9%]     (59% Done with protein)On protein 25/279 [9%]     (59% Done with protein)On protein 25/279 [9%]     (59% Done with protein)On protein 25/279 [9%]     (59% Done with protein)On protein 25/279 [9%]     (60% Done with protein)On protein 25/279 [9%]     (60% Done with protein)On protein 25/279 [9%]     (60% Done with protein)On protein 25/279 [9%]     (60% Done with protein)On protein 25/279 [9%]     (60% Done with protein)On protein 25/279 [9%]     (60% Done with protein)On protein 25/279 [9%]     (61% Done with protein)On protein 25/279 [9%]     (61% Done with protein)On protein 25/279 [9%]     (61% Done with protein)On protein 25/279 [9%]     (61% Done with protein)On protein 25/279 [9%]     (61% Done with protein)On protein 25/279 [9%]     (61% Done with protein)On protein 25/279 [9%]     (62

On protein 26/279 [9%]     (23% Done with protein)On protein 26/279 [9%]     (24% Done with protein)On protein 26/279 [9%]     (24% Done with protein)On protein 26/279 [9%]     (25% Done with protein)On protein 26/279 [9%]     (25% Done with protein)On protein 26/279 [9%]     (25% Done with protein)On protein 26/279 [9%]     (26% Done with protein)On protein 26/279 [9%]     (26% Done with protein)On protein 26/279 [9%]     (27% Done with protein)On protein 26/279 [9%]     (27% Done with protein)On protein 26/279 [9%]     (28% Done with protein)On protein 26/279 [9%]     (28% Done with protein)On protein 26/279 [9%]     (29% Done with protein)On protein 26/279 [9%]     (29% Done with protein)On protein 26/279 [9%]     (29% Done with protein)On protein 26/279 [9%]     (30% Done with protein)On protein 26/279 [9%]     (30% Done with protein)On protein 26/279 [9%]     (31% Done with protein)On protein 26/279 [9%]     (31% Done with protein)On protein 26/279 [9%]     (32

On protein 27/279 [10%]     (31% Done with protein)On protein 27/279 [10%]     (31% Done with protein)On protein 27/279 [10%]     (32% Done with protein)On protein 27/279 [10%]     (32% Done with protein)On protein 27/279 [10%]     (32% Done with protein)On protein 27/279 [10%]     (32% Done with protein)On protein 27/279 [10%]     (33% Done with protein)On protein 27/279 [10%]     (33% Done with protein)On protein 27/279 [10%]     (33% Done with protein)On protein 27/279 [10%]     (33% Done with protein)On protein 27/279 [10%]     (33% Done with protein)On protein 27/279 [10%]     (34% Done with protein)On protein 27/279 [10%]     (34% Done with protein)On protein 27/279 [10%]     (34% Done with protein)On protein 27/279 [10%]     (34% Done with protein)On protein 27/279 [10%]     (35% Done with protein)On protein 27/279 [10%]     (35% Done with protein)On protein 27/279 [10%]     (35% Done with protein)On protein 27/279 [10%]     (35% Done with protein)On protein 

On protein 278/279 [100%]     (100% Done with protein)

In [13]:
for spec_count in top_b:
    print(f'Spectrum {spec_count}')
    for i in range(top_x):
        b_prot = prots[top_b[spec_count][i][1]]
        b_start_idx = top_b[spec_count][i][2]
        b_score = top_b[spec_count][i][0]
        
        y_prot = prots[top_y[spec_count][i][1]]
        y_start_idx = top_y[spec_count][i][2]
        y_score = top_y[spec_count][i][0]
        
        by_prot = prots[top_by
                        [spec_count]
                        [i]
                        [1]]
        by_start_idx = top_by[spec_count][i][2]
        by_score = top_by[spec_count][i][0]
        
        print(f'''\t{i}. b: {b_prot[b_start_idx:b_start_idx+avg_len]}: {b_score} \t y: {y_prot[y_start_idx-avg_len:y_start_idx+1]}: {y_score} \t both: {by_prot[by_start_idx:by_start_idx+avg_len]}: {by_score}''')
        

Spectrum 0
	0. b: WASLDAE: 6.0 	 y: KAEDMDGR: 6.0 	 both: WYNHIKS: 8.0
	1. b: WNRSLDT: 6.0 	 y: MRDRLIQF: 6.0 	 both: YTDEHGE: 7.0
	2. b: KKHAHLT: 6.0 	 y: ETNMYEGV: 6.0 	 both: EGPLLEK: 7.0
	3. b: ASVQAIN: 6.0 	 y: MDVSAVED: 6.0 	 both: LGSPGLP: 7.0
	4. b: VSAAASN: 5.0 	 y: NSKVAGIR: 5.0 	 both: WRNVPEK: 7.0
	5. b: KQFPVTR: 5.0 	 y: SWSLEHKS: 5.0 	 both: WNRSLDT: 7.0
	6. b: EKAQSEL: 5.0 	 y: AEIQIRIE: 5.0 	 both: LGGPIIT: 7.0
	7. b: GGSVDSV: 5.0 	 y: KAAGLCEQ: 5.0 	 both: FLLCLAG: 7.0
	8. b: KSTSPTL: 5.0 	 y: TAESSVSS: 5.0 	 both: KSSVAVP: 7.0
	9. b: KTKVEAF: 5.0 	 y: ERGDAVAK: 5.0 	 both: WAKARTV: 7.0
Spectrum 1
	0. b: ASVPAQS: 6.0 	 y: QHRQALIA: 6.0 	 both: ASVCMQV: 8.0
	1. b: ASVPAGG: 6.0 	 y: SAAPAAGS: 6.0 	 both: ASVDLEL: 8.0
	2. b: ASLGPIE: 6.0 	 y: REYERDWY: 6.0 	 both: AEGLYHF: 7.0
	3. b: VASDAKS: 5.0 	 y: NNLCLHFN: 5.0 	 both: SVASNEP: 7.0
	4. b: ASVLGLQ: 5.0 	 y: TDSFFKVL: 5.0 	 both: AVSDLQE: 7.0
	5. b: ADALEEE: 5.0 	 y: NFEEALAA: 5.0 	 both: GLSDGEW: 7.0
	6. b: SVAYKNV: 5.

Hybrid: 0
```
DLPVNSPMTKG
```

Non hybrid: 1
``` 
DAAIVGYK
```

Non hybrid: 2
```
DEAPNFEANTTIGRIRFH
```

Non hybrid: 3
``` 
DPQVEQLEL
```

Hybrid 1: 4
```
DLQTLALWSRM
```

Non hybrid: 5
```
DLTEYLSRFGEVV
```

Hybrid 2: 6
```
DLQTLEVE
```

Non Hybrid: 7
```
DLSSASAIMGNAKVKAHGKKVITAFN
```

Non hybrid: 8
```
EEILKAFKLF
```