# Pre-processing spectra to make easier to search

Idea: make one big spectrum that we will then tag masses with amino acid sequences

Flow: 
1. Load all spectra
2. Make a single large spectrum called `S`
3. Find the `max_length` peptide from the maximum mass
4. Load the database
5. For each protein `P` of the database
    1. For each kmer `k` of length `max_length`
        1. Calculate each individual spectrum for `(b+, b++, y+, y++)` (we call `ts`)
        2. For each `ts`:
            1. For each mass `m` of this `ts`:
                1. Binary search `S` for `m` plus/minus tolerance
                2. If the mass is found, add the `k(m)` to a dictionary to keep for later split by ion type
6. Build a MassDawg for both `b` and `y` kmers taken from the search
7. For each input spectrum:
    1. Search both the `b` and `y` MassDawgs for sequences 
    2. Make an alignment
                

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from pyteomics import fasta
from collections import defaultdict
from src.sequence.gen_spectra import gen_spectrum, gen_min_ordering
from src.file_io import mzML
from src.objects import Spectrum, Database
from src.utils import ppm_to_da
from src.scoring import scoring
from src.tree import Tree
from src.identfication import alignment

from src.database import extract_protein_name

import bisect

from mass_dawg import PyMassDawg

import numpy as np

ppm_tol = 20

In [2]:
def longest_array(window, length):
    return int(np.ceil(1/window * length * 186.079313))

def sparse_it(spectrum, window, length):
    sparse = np.zeros(longest_array(window, length))
    for mz in spectrum:
        mz_direct = int(mz/window)
        sparse[mz_direct] = 1
    
    return sparse

## 1. Load all spectra

In [3]:
spectra_file = '/Users/zacharymcgrath/Desktop/nod2 data/single/singleRealSpectrum.mzml'
fasta_file = '/Users/zacharymcgrath/Desktop/nod2 data/filteredNOD2.fasta'

In [4]:
spectra = mzML.read(spectra_file, peak_filter=25)

## 2. Make the single large spectrum

In [5]:
all_spectra = sorted([x for spectrum in spectra for x in spectrum.spectrum])

## 3. Find the max length possible peptide

In [6]:
max_len = int(np.ceil(all_spectra[-1]/ 57.021464))

## 4. Load the database into RAM

In [7]:
prots = {}
for i, entry in enumerate(fasta.read(fasta_file)):
    name = extract_protein_name(entry)
    prots[name] = entry

## 5. The search

In [8]:
%%time
def find_kmer_hits(kmer: str, prot_name: str) -> None:
    for ion in 'by':
        for charge in [1, 2]:
            spec = gen_spectrum(kmer, ion=ion, charge=charge)['spectrum']

            for c, mass in enumerate(spec):

                da_tol = ppm_to_da(mass, ppm_tol)
                lb = mass - da_tol
                ub = mass + da_tol

                beginning_entry = bisect.bisect_left(all_spectra, lb)

                # see if the NEXT value is in the range. If so, keep the kmer
                if beginning_entry + 1 < len(all_spectra) and all_spectra[beginning_entry] <= ub:
                    
                    if ion == 'b':
                        b_hits[kmer[:c+1]] = None
                        t.insert(prot_name, kmer[:c+1])
                    else:
                        y_hits[kmer[-c-1:]] = None
                        t.insert(prot_name, kmer[-c-1:])

b_hits = {}
y_hits = {}
plen = len(prots)

t = Tree()

for i, (name, prot_entry) in enumerate(prots.items()):
    
    
    print(f'\rOn protein {i+1}/{plen} [{int((i+1) * 100 / plen)}%]', end='')
    
    for j in range(1, max_len):
        kmer = prot_entry.sequence[:j]
        find_kmer_hits(kmer, name)
    
    for j in range(len(prot_entry.sequence) - max_len):
        kmer = prot_entry.sequence[j:j+max_len]
        find_kmer_hits(kmer, name)     
        
    for j in range(len(prot_entry.sequence) - max_len, len(prot_entry.sequence)):
        kmer = prot_entry.sequence[j:]
        find_kmer_hits(kmer, name)
                    

On protein 279/279 [100%]CPU times: user 17.3 s, sys: 201 ms, total: 17.5 s
Wall time: 17.6 s


## 6. Build the MassDawgs for the different ions

In [9]:
b_dog = PyMassDawg()
y_dog = PyMassDawg()

In [10]:
blen = len(b_hits)
for i, kmer in enumerate(sorted(b_hits.keys(), key=gen_min_ordering)):
    print(f'\rOne kmer {i}/{blen} [{int(100 * (i+1)/blen)}%]', end='')
    bs = gen_spectrum(kmer, ion='b', charge=1)['spectrum']
    bd = gen_spectrum(kmer, ion='b', charge=2)['spectrum']
    b_dog.insert(bs, bd, kmer)
b_dog.finish()

One kmer 0/28587 [0%]One kmer 1/28587 [0%]One kmer 2/28587 [0%]One kmer 3/28587 [0%]One kmer 4/28587 [0%]One kmer 5/28587 [0%]One kmer 6/28587 [0%]One kmer 7/28587 [0%]One kmer 8/28587 [0%]One kmer 9/28587 [0%]One kmer 10/28587 [0%]One kmer 11/28587 [0%]One kmer 12/28587 [0%]One kmer 13/28587 [0%]One kmer 14/28587 [0%]One kmer 15/28587 [0%]One kmer 16/28587 [0%]One kmer 17/28587 [0%]One kmer 18/28587 [0%]One kmer 19/28587 [0%]One kmer 20/28587 [0%]One kmer 21/28587 [0%]One kmer 22/28587 [0%]One kmer 23/28587 [0%]One kmer 24/28587 [0%]One kmer 25/28587 [0%]One kmer 26/28587 [0%]One kmer 27/28587 [0%]One kmer 28/28587 [0%]One kmer 29/28587 [0%]One kmer 30/28587 [0%]One kmer 31/28587 [0%]One kmer 32/28587 [0%]One kmer 33/28587 [0%]One kmer 34/28587 [0%]One kmer 35/28587 [0%]One kmer 36/28587 [0%]One kmer 37/28587 [0%]One kmer 38/28587 [0%]One kmer 39/28587 [0%]One kmer 40/28587 [0%]One kmer 41/28587 [0%]One kmer 42/28587 [0%]One kmer 43/28587 [0

One kmer 1628/28587 [5%]One kmer 1629/28587 [5%]One kmer 1630/28587 [5%]One kmer 1631/28587 [5%]One kmer 1632/28587 [5%]One kmer 1633/28587 [5%]One kmer 1634/28587 [5%]One kmer 1635/28587 [5%]One kmer 1636/28587 [5%]One kmer 1637/28587 [5%]One kmer 1638/28587 [5%]One kmer 1639/28587 [5%]One kmer 1640/28587 [5%]One kmer 1641/28587 [5%]One kmer 1642/28587 [5%]One kmer 1643/28587 [5%]One kmer 1644/28587 [5%]One kmer 1645/28587 [5%]One kmer 1646/28587 [5%]One kmer 1647/28587 [5%]One kmer 1648/28587 [5%]One kmer 1649/28587 [5%]One kmer 1650/28587 [5%]One kmer 1651/28587 [5%]One kmer 1652/28587 [5%]One kmer 1653/28587 [5%]One kmer 1654/28587 [5%]One kmer 1655/28587 [5%]One kmer 1656/28587 [5%]One kmer 1657/28587 [5%]One kmer 1658/28587 [5%]One kmer 1659/28587 [5%]One kmer 1660/28587 [5%]One kmer 1661/28587 [5%]One kmer 1662/28587 [5%]One kmer 1663/28587 [5%]One kmer 1664/28587 [5%]One kmer 1665/28587 [5%]One kmer 1666/28587 [5%]One kmer 1667/28587 [5%]

One kmer 3008/28587 [10%]One kmer 3009/28587 [10%]One kmer 3010/28587 [10%]One kmer 3011/28587 [10%]One kmer 3012/28587 [10%]One kmer 3013/28587 [10%]One kmer 3014/28587 [10%]One kmer 3015/28587 [10%]One kmer 3016/28587 [10%]One kmer 3017/28587 [10%]One kmer 3018/28587 [10%]One kmer 3019/28587 [10%]One kmer 3020/28587 [10%]One kmer 3021/28587 [10%]One kmer 3022/28587 [10%]One kmer 3023/28587 [10%]One kmer 3024/28587 [10%]One kmer 3025/28587 [10%]One kmer 3026/28587 [10%]One kmer 3027/28587 [10%]One kmer 3028/28587 [10%]One kmer 3029/28587 [10%]One kmer 3030/28587 [10%]One kmer 3031/28587 [10%]One kmer 3032/28587 [10%]One kmer 3033/28587 [10%]One kmer 3034/28587 [10%]One kmer 3035/28587 [10%]One kmer 3036/28587 [10%]One kmer 3037/28587 [10%]One kmer 3038/28587 [10%]One kmer 3039/28587 [10%]One kmer 3040/28587 [10%]One kmer 3041/28587 [10%]One kmer 3042/28587 [10%]One kmer 3043/28587 [10%]One kmer 3044/28587 [10%]One kmer 3045/28587 [10%]One kmer 30

One kmer 4127/28587 [14%]One kmer 4128/28587 [14%]One kmer 4129/28587 [14%]One kmer 4130/28587 [14%]One kmer 4131/28587 [14%]One kmer 4132/28587 [14%]One kmer 4133/28587 [14%]One kmer 4134/28587 [14%]One kmer 4135/28587 [14%]One kmer 4136/28587 [14%]One kmer 4137/28587 [14%]One kmer 4138/28587 [14%]One kmer 4139/28587 [14%]One kmer 4140/28587 [14%]One kmer 4141/28587 [14%]One kmer 4142/28587 [14%]One kmer 4143/28587 [14%]One kmer 4144/28587 [14%]One kmer 4145/28587 [14%]One kmer 4146/28587 [14%]One kmer 4147/28587 [14%]One kmer 4148/28587 [14%]One kmer 4149/28587 [14%]One kmer 4150/28587 [14%]One kmer 4151/28587 [14%]One kmer 4152/28587 [14%]One kmer 4153/28587 [14%]One kmer 4154/28587 [14%]One kmer 4155/28587 [14%]One kmer 4156/28587 [14%]One kmer 4157/28587 [14%]One kmer 4158/28587 [14%]One kmer 4159/28587 [14%]One kmer 4160/28587 [14%]One kmer 4161/28587 [14%]One kmer 4162/28587 [14%]One kmer 4163/28587 [14%]One kmer 4164/28587 [14%]One kmer 41

One kmer 5453/28587 [19%]One kmer 5454/28587 [19%]One kmer 5455/28587 [19%]One kmer 5456/28587 [19%]One kmer 5457/28587 [19%]One kmer 5458/28587 [19%]One kmer 5459/28587 [19%]One kmer 5460/28587 [19%]One kmer 5461/28587 [19%]One kmer 5462/28587 [19%]One kmer 5463/28587 [19%]One kmer 5464/28587 [19%]One kmer 5465/28587 [19%]One kmer 5466/28587 [19%]One kmer 5467/28587 [19%]One kmer 5468/28587 [19%]One kmer 5469/28587 [19%]One kmer 5470/28587 [19%]One kmer 5471/28587 [19%]One kmer 5472/28587 [19%]One kmer 5473/28587 [19%]One kmer 5474/28587 [19%]One kmer 5475/28587 [19%]One kmer 5476/28587 [19%]One kmer 5477/28587 [19%]One kmer 5478/28587 [19%]One kmer 5479/28587 [19%]One kmer 5480/28587 [19%]One kmer 5481/28587 [19%]One kmer 5482/28587 [19%]One kmer 5483/28587 [19%]One kmer 5484/28587 [19%]One kmer 5485/28587 [19%]One kmer 5486/28587 [19%]One kmer 5487/28587 [19%]One kmer 5488/28587 [19%]One kmer 5489/28587 [19%]One kmer 5490/28587 [19%]One kmer 54

One kmer 6626/28587 [23%]One kmer 6627/28587 [23%]One kmer 6628/28587 [23%]One kmer 6629/28587 [23%]One kmer 6630/28587 [23%]One kmer 6631/28587 [23%]One kmer 6632/28587 [23%]One kmer 6633/28587 [23%]One kmer 6634/28587 [23%]One kmer 6635/28587 [23%]One kmer 6636/28587 [23%]One kmer 6637/28587 [23%]One kmer 6638/28587 [23%]One kmer 6639/28587 [23%]One kmer 6640/28587 [23%]One kmer 6641/28587 [23%]One kmer 6642/28587 [23%]One kmer 6643/28587 [23%]One kmer 6644/28587 [23%]One kmer 6645/28587 [23%]One kmer 6646/28587 [23%]One kmer 6647/28587 [23%]One kmer 6648/28587 [23%]One kmer 6649/28587 [23%]One kmer 6650/28587 [23%]One kmer 6651/28587 [23%]One kmer 6652/28587 [23%]One kmer 6653/28587 [23%]One kmer 6654/28587 [23%]One kmer 6655/28587 [23%]One kmer 6656/28587 [23%]One kmer 6657/28587 [23%]One kmer 6658/28587 [23%]One kmer 6659/28587 [23%]One kmer 6660/28587 [23%]One kmer 6661/28587 [23%]One kmer 6662/28587 [23%]One kmer 6663/28587 [23%]One kmer 66

One kmer 8126/28587 [28%]One kmer 8127/28587 [28%]One kmer 8128/28587 [28%]One kmer 8129/28587 [28%]One kmer 8130/28587 [28%]One kmer 8131/28587 [28%]One kmer 8132/28587 [28%]One kmer 8133/28587 [28%]One kmer 8134/28587 [28%]One kmer 8135/28587 [28%]One kmer 8136/28587 [28%]One kmer 8137/28587 [28%]One kmer 8138/28587 [28%]One kmer 8139/28587 [28%]One kmer 8140/28587 [28%]One kmer 8141/28587 [28%]One kmer 8142/28587 [28%]One kmer 8143/28587 [28%]One kmer 8144/28587 [28%]One kmer 8145/28587 [28%]One kmer 8146/28587 [28%]One kmer 8147/28587 [28%]One kmer 8148/28587 [28%]One kmer 8149/28587 [28%]One kmer 8150/28587 [28%]One kmer 8151/28587 [28%]One kmer 8152/28587 [28%]One kmer 8153/28587 [28%]One kmer 8154/28587 [28%]One kmer 8155/28587 [28%]One kmer 8156/28587 [28%]One kmer 8157/28587 [28%]One kmer 8158/28587 [28%]One kmer 8159/28587 [28%]One kmer 8160/28587 [28%]One kmer 8161/28587 [28%]One kmer 8162/28587 [28%]One kmer 8163/28587 [28%]One kmer 81

One kmer 9424/28587 [32%]One kmer 9425/28587 [32%]One kmer 9426/28587 [32%]One kmer 9427/28587 [32%]One kmer 9428/28587 [32%]One kmer 9429/28587 [32%]One kmer 9430/28587 [32%]One kmer 9431/28587 [32%]One kmer 9432/28587 [32%]One kmer 9433/28587 [33%]One kmer 9434/28587 [33%]One kmer 9435/28587 [33%]One kmer 9436/28587 [33%]One kmer 9437/28587 [33%]One kmer 9438/28587 [33%]One kmer 9439/28587 [33%]One kmer 9440/28587 [33%]One kmer 9441/28587 [33%]One kmer 9442/28587 [33%]One kmer 9443/28587 [33%]One kmer 9444/28587 [33%]One kmer 9445/28587 [33%]One kmer 9446/28587 [33%]One kmer 9447/28587 [33%]One kmer 9448/28587 [33%]One kmer 9449/28587 [33%]One kmer 9450/28587 [33%]One kmer 9451/28587 [33%]One kmer 9452/28587 [33%]One kmer 9453/28587 [33%]One kmer 9454/28587 [33%]One kmer 9455/28587 [33%]One kmer 9456/28587 [33%]One kmer 9457/28587 [33%]One kmer 9458/28587 [33%]One kmer 9459/28587 [33%]One kmer 9460/28587 [33%]One kmer 9461/28587 [33%]One kmer 94

One kmer 10625/28587 [37%]One kmer 10626/28587 [37%]One kmer 10627/28587 [37%]One kmer 10628/28587 [37%]One kmer 10629/28587 [37%]One kmer 10630/28587 [37%]One kmer 10631/28587 [37%]One kmer 10632/28587 [37%]One kmer 10633/28587 [37%]One kmer 10634/28587 [37%]One kmer 10635/28587 [37%]One kmer 10636/28587 [37%]One kmer 10637/28587 [37%]One kmer 10638/28587 [37%]One kmer 10639/28587 [37%]One kmer 10640/28587 [37%]One kmer 10641/28587 [37%]One kmer 10642/28587 [37%]One kmer 10643/28587 [37%]One kmer 10644/28587 [37%]One kmer 10645/28587 [37%]One kmer 10646/28587 [37%]One kmer 10647/28587 [37%]One kmer 10648/28587 [37%]One kmer 10649/28587 [37%]One kmer 10650/28587 [37%]One kmer 10651/28587 [37%]One kmer 10652/28587 [37%]One kmer 10653/28587 [37%]One kmer 10654/28587 [37%]One kmer 10655/28587 [37%]One kmer 10656/28587 [37%]One kmer 10657/28587 [37%]One kmer 10658/28587 [37%]One kmer 10659/28587 [37%]One kmer 10660/28587 [37%]One kmer 10661/28587 [37%]

One kmer 12013/28587 [42%]One kmer 12014/28587 [42%]One kmer 12015/28587 [42%]One kmer 12016/28587 [42%]One kmer 12017/28587 [42%]One kmer 12018/28587 [42%]One kmer 12019/28587 [42%]One kmer 12020/28587 [42%]One kmer 12021/28587 [42%]One kmer 12022/28587 [42%]One kmer 12023/28587 [42%]One kmer 12024/28587 [42%]One kmer 12025/28587 [42%]One kmer 12026/28587 [42%]One kmer 12027/28587 [42%]One kmer 12028/28587 [42%]One kmer 12029/28587 [42%]One kmer 12030/28587 [42%]One kmer 12031/28587 [42%]One kmer 12032/28587 [42%]One kmer 12033/28587 [42%]One kmer 12034/28587 [42%]One kmer 12035/28587 [42%]One kmer 12036/28587 [42%]One kmer 12037/28587 [42%]One kmer 12038/28587 [42%]One kmer 12039/28587 [42%]One kmer 12040/28587 [42%]One kmer 12041/28587 [42%]One kmer 12042/28587 [42%]One kmer 12043/28587 [42%]One kmer 12044/28587 [42%]One kmer 12045/28587 [42%]One kmer 12046/28587 [42%]One kmer 12047/28587 [42%]One kmer 12048/28587 [42%]One kmer 12049/28587 [42%]

One kmer 13124/28587 [45%]One kmer 13125/28587 [45%]One kmer 13126/28587 [45%]One kmer 13127/28587 [45%]One kmer 13128/28587 [45%]One kmer 13129/28587 [45%]One kmer 13130/28587 [45%]One kmer 13131/28587 [45%]One kmer 13132/28587 [45%]One kmer 13133/28587 [45%]One kmer 13134/28587 [45%]One kmer 13135/28587 [45%]One kmer 13136/28587 [45%]One kmer 13137/28587 [45%]One kmer 13138/28587 [45%]One kmer 13139/28587 [45%]One kmer 13140/28587 [45%]One kmer 13141/28587 [45%]One kmer 13142/28587 [45%]One kmer 13143/28587 [45%]One kmer 13144/28587 [45%]One kmer 13145/28587 [45%]One kmer 13146/28587 [45%]One kmer 13147/28587 [45%]One kmer 13148/28587 [45%]One kmer 13149/28587 [45%]One kmer 13150/28587 [46%]One kmer 13151/28587 [46%]One kmer 13152/28587 [46%]One kmer 13153/28587 [46%]One kmer 13154/28587 [46%]One kmer 13155/28587 [46%]One kmer 13156/28587 [46%]One kmer 13157/28587 [46%]One kmer 13158/28587 [46%]One kmer 13159/28587 [46%]One kmer 13160/28587 [46%]

One kmer 14434/28587 [50%]One kmer 14435/28587 [50%]One kmer 14436/28587 [50%]One kmer 14437/28587 [50%]One kmer 14438/28587 [50%]One kmer 14439/28587 [50%]One kmer 14440/28587 [50%]One kmer 14441/28587 [50%]One kmer 14442/28587 [50%]One kmer 14443/28587 [50%]One kmer 14444/28587 [50%]One kmer 14445/28587 [50%]One kmer 14446/28587 [50%]One kmer 14447/28587 [50%]One kmer 14448/28587 [50%]One kmer 14449/28587 [50%]One kmer 14450/28587 [50%]One kmer 14451/28587 [50%]One kmer 14452/28587 [50%]One kmer 14453/28587 [50%]One kmer 14454/28587 [50%]One kmer 14455/28587 [50%]One kmer 14456/28587 [50%]One kmer 14457/28587 [50%]One kmer 14458/28587 [50%]One kmer 14459/28587 [50%]One kmer 14460/28587 [50%]One kmer 14461/28587 [50%]One kmer 14462/28587 [50%]One kmer 14463/28587 [50%]One kmer 14464/28587 [50%]One kmer 14465/28587 [50%]One kmer 14466/28587 [50%]One kmer 14467/28587 [50%]One kmer 14468/28587 [50%]One kmer 14469/28587 [50%]One kmer 14470/28587 [50%]

One kmer 15623/28587 [54%]One kmer 15624/28587 [54%]One kmer 15625/28587 [54%]One kmer 15626/28587 [54%]One kmer 15627/28587 [54%]One kmer 15628/28587 [54%]One kmer 15629/28587 [54%]One kmer 15630/28587 [54%]One kmer 15631/28587 [54%]One kmer 15632/28587 [54%]One kmer 15633/28587 [54%]One kmer 15634/28587 [54%]One kmer 15635/28587 [54%]One kmer 15636/28587 [54%]One kmer 15637/28587 [54%]One kmer 15638/28587 [54%]One kmer 15639/28587 [54%]One kmer 15640/28587 [54%]One kmer 15641/28587 [54%]One kmer 15642/28587 [54%]One kmer 15643/28587 [54%]One kmer 15644/28587 [54%]One kmer 15645/28587 [54%]One kmer 15646/28587 [54%]One kmer 15647/28587 [54%]One kmer 15648/28587 [54%]One kmer 15649/28587 [54%]One kmer 15650/28587 [54%]One kmer 15651/28587 [54%]One kmer 15652/28587 [54%]One kmer 15653/28587 [54%]One kmer 15654/28587 [54%]One kmer 15655/28587 [54%]One kmer 15656/28587 [54%]One kmer 15657/28587 [54%]One kmer 15658/28587 [54%]One kmer 15659/28587 [54%]

One kmer 16853/28587 [58%]One kmer 16854/28587 [58%]One kmer 16855/28587 [58%]One kmer 16856/28587 [58%]One kmer 16857/28587 [58%]One kmer 16858/28587 [58%]One kmer 16859/28587 [58%]One kmer 16860/28587 [58%]One kmer 16861/28587 [58%]One kmer 16862/28587 [58%]One kmer 16863/28587 [58%]One kmer 16864/28587 [58%]One kmer 16865/28587 [58%]One kmer 16866/28587 [59%]One kmer 16867/28587 [59%]One kmer 16868/28587 [59%]One kmer 16869/28587 [59%]One kmer 16870/28587 [59%]One kmer 16871/28587 [59%]One kmer 16872/28587 [59%]One kmer 16873/28587 [59%]One kmer 16874/28587 [59%]One kmer 16875/28587 [59%]One kmer 16876/28587 [59%]One kmer 16877/28587 [59%]One kmer 16878/28587 [59%]One kmer 16879/28587 [59%]One kmer 16880/28587 [59%]One kmer 16881/28587 [59%]One kmer 16882/28587 [59%]One kmer 16883/28587 [59%]One kmer 16884/28587 [59%]One kmer 16885/28587 [59%]One kmer 16886/28587 [59%]One kmer 16887/28587 [59%]One kmer 16888/28587 [59%]One kmer 16889/28587 [59%]

One kmer 18077/28587 [63%]One kmer 18078/28587 [63%]One kmer 18079/28587 [63%]One kmer 18080/28587 [63%]One kmer 18081/28587 [63%]One kmer 18082/28587 [63%]One kmer 18083/28587 [63%]One kmer 18084/28587 [63%]One kmer 18085/28587 [63%]One kmer 18086/28587 [63%]One kmer 18087/28587 [63%]One kmer 18088/28587 [63%]One kmer 18089/28587 [63%]One kmer 18090/28587 [63%]One kmer 18091/28587 [63%]One kmer 18092/28587 [63%]One kmer 18093/28587 [63%]One kmer 18094/28587 [63%]One kmer 18095/28587 [63%]One kmer 18096/28587 [63%]One kmer 18097/28587 [63%]One kmer 18098/28587 [63%]One kmer 18099/28587 [63%]One kmer 18100/28587 [63%]One kmer 18101/28587 [63%]One kmer 18102/28587 [63%]One kmer 18103/28587 [63%]One kmer 18104/28587 [63%]One kmer 18105/28587 [63%]One kmer 18106/28587 [63%]One kmer 18107/28587 [63%]One kmer 18108/28587 [63%]One kmer 18109/28587 [63%]One kmer 18110/28587 [63%]One kmer 18111/28587 [63%]One kmer 18112/28587 [63%]One kmer 18113/28587 [63%]

One kmer 19002/28587 [66%]One kmer 19003/28587 [66%]One kmer 19004/28587 [66%]One kmer 19005/28587 [66%]One kmer 19006/28587 [66%]One kmer 19007/28587 [66%]One kmer 19008/28587 [66%]One kmer 19009/28587 [66%]One kmer 19010/28587 [66%]One kmer 19011/28587 [66%]One kmer 19012/28587 [66%]One kmer 19013/28587 [66%]One kmer 19014/28587 [66%]One kmer 19015/28587 [66%]One kmer 19016/28587 [66%]One kmer 19017/28587 [66%]One kmer 19018/28587 [66%]One kmer 19019/28587 [66%]One kmer 19020/28587 [66%]One kmer 19021/28587 [66%]One kmer 19022/28587 [66%]One kmer 19023/28587 [66%]One kmer 19024/28587 [66%]One kmer 19025/28587 [66%]One kmer 19026/28587 [66%]One kmer 19027/28587 [66%]One kmer 19028/28587 [66%]One kmer 19029/28587 [66%]One kmer 19030/28587 [66%]One kmer 19031/28587 [66%]One kmer 19032/28587 [66%]One kmer 19033/28587 [66%]One kmer 19034/28587 [66%]One kmer 19035/28587 [66%]One kmer 19036/28587 [66%]One kmer 19037/28587 [66%]One kmer 19038/28587 [66%]

One kmer 20621/28587 [72%]One kmer 20622/28587 [72%]One kmer 20623/28587 [72%]One kmer 20624/28587 [72%]One kmer 20625/28587 [72%]One kmer 20626/28587 [72%]One kmer 20627/28587 [72%]One kmer 20628/28587 [72%]One kmer 20629/28587 [72%]One kmer 20630/28587 [72%]One kmer 20631/28587 [72%]One kmer 20632/28587 [72%]One kmer 20633/28587 [72%]One kmer 20634/28587 [72%]One kmer 20635/28587 [72%]One kmer 20636/28587 [72%]One kmer 20637/28587 [72%]One kmer 20638/28587 [72%]One kmer 20639/28587 [72%]One kmer 20640/28587 [72%]One kmer 20641/28587 [72%]One kmer 20642/28587 [72%]One kmer 20643/28587 [72%]One kmer 20644/28587 [72%]One kmer 20645/28587 [72%]One kmer 20646/28587 [72%]One kmer 20647/28587 [72%]One kmer 20648/28587 [72%]One kmer 20649/28587 [72%]One kmer 20650/28587 [72%]One kmer 20651/28587 [72%]One kmer 20652/28587 [72%]One kmer 20653/28587 [72%]One kmer 20654/28587 [72%]One kmer 20655/28587 [72%]One kmer 20656/28587 [72%]One kmer 20657/28587 [72%]

One kmer 21620/28587 [75%]One kmer 21621/28587 [75%]One kmer 21622/28587 [75%]One kmer 21623/28587 [75%]One kmer 21624/28587 [75%]One kmer 21625/28587 [75%]One kmer 21626/28587 [75%]One kmer 21627/28587 [75%]One kmer 21628/28587 [75%]One kmer 21629/28587 [75%]One kmer 21630/28587 [75%]One kmer 21631/28587 [75%]One kmer 21632/28587 [75%]One kmer 21633/28587 [75%]One kmer 21634/28587 [75%]One kmer 21635/28587 [75%]One kmer 21636/28587 [75%]One kmer 21637/28587 [75%]One kmer 21638/28587 [75%]One kmer 21639/28587 [75%]One kmer 21640/28587 [75%]One kmer 21641/28587 [75%]One kmer 21642/28587 [75%]One kmer 21643/28587 [75%]One kmer 21644/28587 [75%]One kmer 21645/28587 [75%]One kmer 21646/28587 [75%]One kmer 21647/28587 [75%]One kmer 21648/28587 [75%]One kmer 21649/28587 [75%]One kmer 21650/28587 [75%]One kmer 21651/28587 [75%]One kmer 21652/28587 [75%]One kmer 21653/28587 [75%]One kmer 21654/28587 [75%]One kmer 21655/28587 [75%]One kmer 21656/28587 [75%]

One kmer 23007/28587 [80%]One kmer 23008/28587 [80%]One kmer 23009/28587 [80%]One kmer 23010/28587 [80%]One kmer 23011/28587 [80%]One kmer 23012/28587 [80%]One kmer 23013/28587 [80%]One kmer 23014/28587 [80%]One kmer 23015/28587 [80%]One kmer 23016/28587 [80%]One kmer 23017/28587 [80%]One kmer 23018/28587 [80%]One kmer 23019/28587 [80%]One kmer 23020/28587 [80%]One kmer 23021/28587 [80%]One kmer 23022/28587 [80%]One kmer 23023/28587 [80%]One kmer 23024/28587 [80%]One kmer 23025/28587 [80%]One kmer 23026/28587 [80%]One kmer 23027/28587 [80%]One kmer 23028/28587 [80%]One kmer 23029/28587 [80%]One kmer 23030/28587 [80%]One kmer 23031/28587 [80%]One kmer 23032/28587 [80%]One kmer 23033/28587 [80%]One kmer 23034/28587 [80%]One kmer 23035/28587 [80%]One kmer 23036/28587 [80%]One kmer 23037/28587 [80%]One kmer 23038/28587 [80%]One kmer 23039/28587 [80%]One kmer 23040/28587 [80%]One kmer 23041/28587 [80%]One kmer 23042/28587 [80%]One kmer 23043/28587 [80%]

One kmer 24119/28587 [84%]One kmer 24120/28587 [84%]One kmer 24121/28587 [84%]One kmer 24122/28587 [84%]One kmer 24123/28587 [84%]One kmer 24124/28587 [84%]One kmer 24125/28587 [84%]One kmer 24126/28587 [84%]One kmer 24127/28587 [84%]One kmer 24128/28587 [84%]One kmer 24129/28587 [84%]One kmer 24130/28587 [84%]One kmer 24131/28587 [84%]One kmer 24132/28587 [84%]One kmer 24133/28587 [84%]One kmer 24134/28587 [84%]One kmer 24135/28587 [84%]One kmer 24136/28587 [84%]One kmer 24137/28587 [84%]One kmer 24138/28587 [84%]One kmer 24139/28587 [84%]One kmer 24140/28587 [84%]One kmer 24141/28587 [84%]One kmer 24142/28587 [84%]One kmer 24143/28587 [84%]One kmer 24144/28587 [84%]One kmer 24145/28587 [84%]One kmer 24146/28587 [84%]One kmer 24147/28587 [84%]One kmer 24148/28587 [84%]One kmer 24149/28587 [84%]One kmer 24150/28587 [84%]One kmer 24151/28587 [84%]One kmer 24152/28587 [84%]One kmer 24153/28587 [84%]One kmer 24154/28587 [84%]One kmer 24155/28587 [84%]

One kmer 25483/28587 [89%]One kmer 25484/28587 [89%]One kmer 25485/28587 [89%]One kmer 25486/28587 [89%]One kmer 25487/28587 [89%]One kmer 25488/28587 [89%]One kmer 25489/28587 [89%]One kmer 25490/28587 [89%]One kmer 25491/28587 [89%]One kmer 25492/28587 [89%]One kmer 25493/28587 [89%]One kmer 25494/28587 [89%]One kmer 25495/28587 [89%]One kmer 25496/28587 [89%]One kmer 25497/28587 [89%]One kmer 25498/28587 [89%]One kmer 25499/28587 [89%]One kmer 25500/28587 [89%]One kmer 25501/28587 [89%]One kmer 25502/28587 [89%]One kmer 25503/28587 [89%]One kmer 25504/28587 [89%]One kmer 25505/28587 [89%]One kmer 25506/28587 [89%]One kmer 25507/28587 [89%]One kmer 25508/28587 [89%]One kmer 25509/28587 [89%]One kmer 25510/28587 [89%]One kmer 25511/28587 [89%]One kmer 25512/28587 [89%]One kmer 25513/28587 [89%]One kmer 25514/28587 [89%]One kmer 25515/28587 [89%]One kmer 25516/28587 [89%]One kmer 25517/28587 [89%]One kmer 25518/28587 [89%]One kmer 25519/28587 [89%]

One kmer 26618/28587 [93%]One kmer 26619/28587 [93%]One kmer 26620/28587 [93%]One kmer 26621/28587 [93%]One kmer 26622/28587 [93%]One kmer 26623/28587 [93%]One kmer 26624/28587 [93%]One kmer 26625/28587 [93%]One kmer 26626/28587 [93%]One kmer 26627/28587 [93%]One kmer 26628/28587 [93%]One kmer 26629/28587 [93%]One kmer 26630/28587 [93%]One kmer 26631/28587 [93%]One kmer 26632/28587 [93%]One kmer 26633/28587 [93%]One kmer 26634/28587 [93%]One kmer 26635/28587 [93%]One kmer 26636/28587 [93%]One kmer 26637/28587 [93%]One kmer 26638/28587 [93%]One kmer 26639/28587 [93%]One kmer 26640/28587 [93%]One kmer 26641/28587 [93%]One kmer 26642/28587 [93%]One kmer 26643/28587 [93%]One kmer 26644/28587 [93%]One kmer 26645/28587 [93%]One kmer 26646/28587 [93%]One kmer 26647/28587 [93%]One kmer 26648/28587 [93%]One kmer 26649/28587 [93%]One kmer 26650/28587 [93%]One kmer 26651/28587 [93%]One kmer 26652/28587 [93%]One kmer 26653/28587 [93%]One kmer 26654/28587 [93%]

One kmer 28046/28587 [98%]One kmer 28047/28587 [98%]One kmer 28048/28587 [98%]One kmer 28049/28587 [98%]One kmer 28050/28587 [98%]One kmer 28051/28587 [98%]One kmer 28052/28587 [98%]One kmer 28053/28587 [98%]One kmer 28054/28587 [98%]One kmer 28055/28587 [98%]One kmer 28056/28587 [98%]One kmer 28057/28587 [98%]One kmer 28058/28587 [98%]One kmer 28059/28587 [98%]One kmer 28060/28587 [98%]One kmer 28061/28587 [98%]One kmer 28062/28587 [98%]One kmer 28063/28587 [98%]One kmer 28064/28587 [98%]One kmer 28065/28587 [98%]One kmer 28066/28587 [98%]One kmer 28067/28587 [98%]One kmer 28068/28587 [98%]One kmer 28069/28587 [98%]One kmer 28070/28587 [98%]One kmer 28071/28587 [98%]One kmer 28072/28587 [98%]One kmer 28073/28587 [98%]One kmer 28074/28587 [98%]One kmer 28075/28587 [98%]One kmer 28076/28587 [98%]One kmer 28077/28587 [98%]One kmer 28078/28587 [98%]One kmer 28079/28587 [98%]One kmer 28080/28587 [98%]One kmer 28081/28587 [98%]One kmer 28082/28587 [98%]

In [11]:
ylen = len(y_hits)
for i, kmer in enumerate(sorted(y_hits.keys(), key=lambda x: gen_min_ordering(x[::-1]))):
    print(f'\rOne kmer {i}/{ylen} [{int(100 * (i+1)/ylen)}%]', end='')
    if 'GYK' == kmer[-3:]:
        print(kmer)
    ys = gen_spectrum(kmer, ion='y', charge=1)['spectrum']
    yd = gen_spectrum(kmer, ion='y', charge=2)['spectrum']
    y_dog.insert(ys, yd, kmer)

y_dog.finish()

One kmer 0/26679 [0%]One kmer 1/26679 [0%]One kmer 2/26679 [0%]One kmer 3/26679 [0%]One kmer 4/26679 [0%]One kmer 5/26679 [0%]One kmer 6/26679 [0%]One kmer 7/26679 [0%]One kmer 8/26679 [0%]One kmer 9/26679 [0%]One kmer 10/26679 [0%]One kmer 11/26679 [0%]One kmer 12/26679 [0%]One kmer 13/26679 [0%]One kmer 14/26679 [0%]One kmer 15/26679 [0%]One kmer 16/26679 [0%]One kmer 17/26679 [0%]One kmer 18/26679 [0%]One kmer 19/26679 [0%]One kmer 20/26679 [0%]One kmer 21/26679 [0%]One kmer 22/26679 [0%]One kmer 23/26679 [0%]One kmer 24/26679 [0%]One kmer 25/26679 [0%]One kmer 26/26679 [0%]One kmer 27/26679 [0%]One kmer 28/26679 [0%]One kmer 29/26679 [0%]One kmer 30/26679 [0%]One kmer 31/26679 [0%]One kmer 32/26679 [0%]One kmer 33/26679 [0%]One kmer 34/26679 [0%]One kmer 35/26679 [0%]One kmer 36/26679 [0%]One kmer 37/26679 [0%]One kmer 38/26679 [0%]One kmer 39/26679 [0%]One kmer 40/26679 [0%]One kmer 41/26679 [0%]One kmer 42/26679 [0%]One kmer 43/26679 [0

One kmer 1185/26679 [4%]One kmer 1186/26679 [4%]One kmer 1187/26679 [4%]One kmer 1188/26679 [4%]One kmer 1189/26679 [4%]One kmer 1190/26679 [4%]One kmer 1191/26679 [4%]One kmer 1192/26679 [4%]One kmer 1193/26679 [4%]One kmer 1194/26679 [4%]One kmer 1195/26679 [4%]One kmer 1196/26679 [4%]One kmer 1197/26679 [4%]One kmer 1198/26679 [4%]One kmer 1199/26679 [4%]One kmer 1200/26679 [4%]One kmer 1201/26679 [4%]One kmer 1202/26679 [4%]One kmer 1203/26679 [4%]One kmer 1204/26679 [4%]One kmer 1205/26679 [4%]One kmer 1206/26679 [4%]One kmer 1207/26679 [4%]One kmer 1208/26679 [4%]One kmer 1209/26679 [4%]One kmer 1210/26679 [4%]One kmer 1211/26679 [4%]One kmer 1212/26679 [4%]One kmer 1213/26679 [4%]One kmer 1214/26679 [4%]One kmer 1215/26679 [4%]One kmer 1216/26679 [4%]One kmer 1217/26679 [4%]One kmer 1218/26679 [4%]One kmer 1219/26679 [4%]One kmer 1220/26679 [4%]One kmer 1221/26679 [4%]One kmer 1222/26679 [4%]One kmer 1223/26679 [4%]One kmer 1224/26679 [4%]

One kmer 2473/26679 [9%]One kmer 2474/26679 [9%]One kmer 2475/26679 [9%]One kmer 2476/26679 [9%]One kmer 2477/26679 [9%]One kmer 2478/26679 [9%]One kmer 2479/26679 [9%]One kmer 2480/26679 [9%]One kmer 2481/26679 [9%]One kmer 2482/26679 [9%]One kmer 2483/26679 [9%]One kmer 2484/26679 [9%]One kmer 2485/26679 [9%]One kmer 2486/26679 [9%]One kmer 2487/26679 [9%]One kmer 2488/26679 [9%]One kmer 2489/26679 [9%]One kmer 2490/26679 [9%]One kmer 2491/26679 [9%]One kmer 2492/26679 [9%]One kmer 2493/26679 [9%]One kmer 2494/26679 [9%]One kmer 2495/26679 [9%]One kmer 2496/26679 [9%]One kmer 2497/26679 [9%]One kmer 2498/26679 [9%]One kmer 2499/26679 [9%]One kmer 2500/26679 [9%]One kmer 2501/26679 [9%]One kmer 2502/26679 [9%]One kmer 2503/26679 [9%]One kmer 2504/26679 [9%]One kmer 2505/26679 [9%]One kmer 2506/26679 [9%]One kmer 2507/26679 [9%]One kmer 2508/26679 [9%]One kmer 2509/26679 [9%]One kmer 2510/26679 [9%]One kmer 2511/26679 [9%]One kmer 2512/26679 [9%]

One kmer 3523/26679 [13%]One kmer 3524/26679 [13%]One kmer 3525/26679 [13%]One kmer 3526/26679 [13%]One kmer 3527/26679 [13%]One kmer 3528/26679 [13%]One kmer 3529/26679 [13%]One kmer 3530/26679 [13%]One kmer 3531/26679 [13%]One kmer 3532/26679 [13%]One kmer 3533/26679 [13%]One kmer 3534/26679 [13%]One kmer 3535/26679 [13%]One kmer 3536/26679 [13%]One kmer 3537/26679 [13%]One kmer 3538/26679 [13%]One kmer 3539/26679 [13%]One kmer 3540/26679 [13%]One kmer 3541/26679 [13%]One kmer 3542/26679 [13%]One kmer 3543/26679 [13%]One kmer 3544/26679 [13%]One kmer 3545/26679 [13%]One kmer 3546/26679 [13%]One kmer 3547/26679 [13%]One kmer 3548/26679 [13%]One kmer 3549/26679 [13%]One kmer 3550/26679 [13%]One kmer 3551/26679 [13%]One kmer 3552/26679 [13%]One kmer 3553/26679 [13%]One kmer 3554/26679 [13%]One kmer 3555/26679 [13%]One kmer 3556/26679 [13%]One kmer 3557/26679 [13%]One kmer 3558/26679 [13%]One kmer 3559/26679 [13%]One kmer 3560/26679 [13%]One kmer 35

One kmer 4812/26679 [18%]One kmer 4813/26679 [18%]One kmer 4814/26679 [18%]One kmer 4815/26679 [18%]One kmer 4816/26679 [18%]One kmer 4817/26679 [18%]One kmer 4818/26679 [18%]One kmer 4819/26679 [18%]One kmer 4820/26679 [18%]One kmer 4821/26679 [18%]One kmer 4822/26679 [18%]One kmer 4823/26679 [18%]One kmer 4824/26679 [18%]One kmer 4825/26679 [18%]One kmer 4826/26679 [18%]One kmer 4827/26679 [18%]One kmer 4828/26679 [18%]One kmer 4829/26679 [18%]One kmer 4830/26679 [18%]One kmer 4831/26679 [18%]One kmer 4832/26679 [18%]One kmer 4833/26679 [18%]One kmer 4834/26679 [18%]One kmer 4835/26679 [18%]One kmer 4836/26679 [18%]One kmer 4837/26679 [18%]One kmer 4838/26679 [18%]One kmer 4839/26679 [18%]One kmer 4840/26679 [18%]One kmer 4841/26679 [18%]One kmer 4842/26679 [18%]One kmer 4843/26679 [18%]One kmer 4844/26679 [18%]One kmer 4845/26679 [18%]One kmer 4846/26679 [18%]One kmer 4847/26679 [18%]One kmer 4848/26679 [18%]One kmer 4849/26679 [18%]One kmer 48

One kmer 5907/26679 [22%]One kmer 5908/26679 [22%]One kmer 5909/26679 [22%]One kmer 5910/26679 [22%]One kmer 5911/26679 [22%]One kmer 5912/26679 [22%]One kmer 5913/26679 [22%]One kmer 5914/26679 [22%]One kmer 5915/26679 [22%]One kmer 5916/26679 [22%]One kmer 5917/26679 [22%]One kmer 5918/26679 [22%]One kmer 5919/26679 [22%]One kmer 5920/26679 [22%]One kmer 5921/26679 [22%]One kmer 5922/26679 [22%]One kmer 5923/26679 [22%]One kmer 5924/26679 [22%]One kmer 5925/26679 [22%]One kmer 5926/26679 [22%]One kmer 5927/26679 [22%]One kmer 5928/26679 [22%]One kmer 5929/26679 [22%]One kmer 5930/26679 [22%]One kmer 5931/26679 [22%]One kmer 5932/26679 [22%]One kmer 5933/26679 [22%]One kmer 5934/26679 [22%]One kmer 5935/26679 [22%]One kmer 5936/26679 [22%]One kmer 5937/26679 [22%]One kmer 5938/26679 [22%]One kmer 5939/26679 [22%]One kmer 5940/26679 [22%]One kmer 5941/26679 [22%]One kmer 5942/26679 [22%]One kmer 5943/26679 [22%]One kmer 5944/26679 [22%]One kmer 59

One kmer 7092/26679 [26%]One kmer 7093/26679 [26%]One kmer 7094/26679 [26%]One kmer 7095/26679 [26%]One kmer 7096/26679 [26%]One kmer 7097/26679 [26%]One kmer 7098/26679 [26%]One kmer 7099/26679 [26%]One kmer 7100/26679 [26%]One kmer 7101/26679 [26%]One kmer 7102/26679 [26%]One kmer 7103/26679 [26%]One kmer 7104/26679 [26%]One kmer 7105/26679 [26%]One kmer 7106/26679 [26%]One kmer 7107/26679 [26%]One kmer 7108/26679 [26%]One kmer 7109/26679 [26%]One kmer 7110/26679 [26%]One kmer 7111/26679 [26%]One kmer 7112/26679 [26%]One kmer 7113/26679 [26%]One kmer 7114/26679 [26%]One kmer 7115/26679 [26%]One kmer 7116/26679 [26%]One kmer 7117/26679 [26%]One kmer 7118/26679 [26%]One kmer 7119/26679 [26%]One kmer 7120/26679 [26%]One kmer 7121/26679 [26%]One kmer 7122/26679 [26%]One kmer 7123/26679 [26%]One kmer 7124/26679 [26%]One kmer 7125/26679 [26%]One kmer 7126/26679 [26%]One kmer 7127/26679 [26%]One kmer 7128/26679 [26%]One kmer 7129/26679 [26%]One kmer 71

One kmer 8051/26679 [30%]One kmer 8052/26679 [30%]One kmer 8053/26679 [30%]One kmer 8054/26679 [30%]One kmer 8055/26679 [30%]One kmer 8056/26679 [30%]One kmer 8057/26679 [30%]One kmer 8058/26679 [30%]One kmer 8059/26679 [30%]One kmer 8060/26679 [30%]One kmer 8061/26679 [30%]One kmer 8062/26679 [30%]One kmer 8063/26679 [30%]One kmer 8064/26679 [30%]One kmer 8065/26679 [30%]One kmer 8066/26679 [30%]One kmer 8067/26679 [30%]One kmer 8068/26679 [30%]One kmer 8069/26679 [30%]One kmer 8070/26679 [30%]One kmer 8071/26679 [30%]One kmer 8072/26679 [30%]One kmer 8073/26679 [30%]One kmer 8074/26679 [30%]One kmer 8075/26679 [30%]One kmer 8076/26679 [30%]One kmer 8077/26679 [30%]One kmer 8078/26679 [30%]One kmer 8079/26679 [30%]One kmer 8080/26679 [30%]One kmer 8081/26679 [30%]One kmer 8082/26679 [30%]One kmer 8083/26679 [30%]One kmer 8084/26679 [30%]One kmer 8085/26679 [30%]One kmer 8086/26679 [30%]One kmer 8087/26679 [30%]One kmer 8088/26679 [30%]One kmer 80

One kmer 9026/26679 [33%]One kmer 9027/26679 [33%]One kmer 9028/26679 [33%]One kmer 9029/26679 [33%]One kmer 9030/26679 [33%]One kmer 9031/26679 [33%]One kmer 9032/26679 [33%]One kmer 9033/26679 [33%]One kmer 9034/26679 [33%]One kmer 9035/26679 [33%]One kmer 9036/26679 [33%]One kmer 9037/26679 [33%]One kmer 9038/26679 [33%]One kmer 9039/26679 [33%]One kmer 9040/26679 [33%]One kmer 9041/26679 [33%]One kmer 9042/26679 [33%]One kmer 9043/26679 [33%]One kmer 9044/26679 [33%]One kmer 9045/26679 [33%]One kmer 9046/26679 [33%]One kmer 9047/26679 [33%]One kmer 9048/26679 [33%]One kmer 9049/26679 [33%]One kmer 9050/26679 [33%]One kmer 9051/26679 [33%]One kmer 9052/26679 [33%]One kmer 9053/26679 [33%]One kmer 9054/26679 [33%]One kmer 9055/26679 [33%]One kmer 9056/26679 [33%]One kmer 9057/26679 [33%]One kmer 9058/26679 [33%]One kmer 9059/26679 [33%]One kmer 9060/26679 [33%]One kmer 9061/26679 [33%]One kmer 9062/26679 [33%]One kmer 9063/26679 [33%]One kmer 90

One kmer 10520/26679 [39%]One kmer 10521/26679 [39%]One kmer 10522/26679 [39%]One kmer 10523/26679 [39%]One kmer 10524/26679 [39%]One kmer 10525/26679 [39%]One kmer 10526/26679 [39%]One kmer 10527/26679 [39%]One kmer 10528/26679 [39%]One kmer 10529/26679 [39%]One kmer 10530/26679 [39%]One kmer 10531/26679 [39%]One kmer 10532/26679 [39%]One kmer 10533/26679 [39%]One kmer 10534/26679 [39%]One kmer 10535/26679 [39%]One kmer 10536/26679 [39%]One kmer 10537/26679 [39%]One kmer 10538/26679 [39%]One kmer 10539/26679 [39%]One kmer 10540/26679 [39%]One kmer 10541/26679 [39%]One kmer 10542/26679 [39%]One kmer 10543/26679 [39%]One kmer 10544/26679 [39%]One kmer 10545/26679 [39%]One kmer 10546/26679 [39%]One kmer 10547/26679 [39%]One kmer 10548/26679 [39%]One kmer 10549/26679 [39%]One kmer 10550/26679 [39%]One kmer 10551/26679 [39%]One kmer 10552/26679 [39%]One kmer 10553/26679 [39%]One kmer 10554/26679 [39%]One kmer 10555/26679 [39%]One kmer 10556/26679 [39%]

One kmer 12020/26679 [45%]One kmer 12021/26679 [45%]One kmer 12022/26679 [45%]One kmer 12023/26679 [45%]One kmer 12024/26679 [45%]One kmer 12025/26679 [45%]One kmer 12026/26679 [45%]One kmer 12027/26679 [45%]One kmer 12028/26679 [45%]One kmer 12029/26679 [45%]One kmer 12030/26679 [45%]One kmer 12031/26679 [45%]One kmer 12032/26679 [45%]One kmer 12033/26679 [45%]One kmer 12034/26679 [45%]One kmer 12035/26679 [45%]One kmer 12036/26679 [45%]One kmer 12037/26679 [45%]One kmer 12038/26679 [45%]One kmer 12039/26679 [45%]One kmer 12040/26679 [45%]One kmer 12041/26679 [45%]One kmer 12042/26679 [45%]One kmer 12043/26679 [45%]One kmer 12044/26679 [45%]One kmer 12045/26679 [45%]One kmer 12046/26679 [45%]One kmer 12047/26679 [45%]One kmer 12048/26679 [45%]One kmer 12049/26679 [45%]One kmer 12050/26679 [45%]One kmer 12051/26679 [45%]One kmer 12052/26679 [45%]One kmer 12053/26679 [45%]One kmer 12054/26679 [45%]One kmer 12055/26679 [45%]One kmer 12056/26679 [45%]

One kmer 13247/26679 [49%]One kmer 13248/26679 [49%]One kmer 13249/26679 [49%]One kmer 13250/26679 [49%]One kmer 13251/26679 [49%]One kmer 13252/26679 [49%]One kmer 13253/26679 [49%]One kmer 13254/26679 [49%]One kmer 13255/26679 [49%]One kmer 13256/26679 [49%]One kmer 13257/26679 [49%]One kmer 13258/26679 [49%]One kmer 13259/26679 [49%]One kmer 13260/26679 [49%]One kmer 13261/26679 [49%]One kmer 13262/26679 [49%]One kmer 13263/26679 [49%]One kmer 13264/26679 [49%]One kmer 13265/26679 [49%]One kmer 13266/26679 [49%]One kmer 13267/26679 [49%]One kmer 13268/26679 [49%]One kmer 13269/26679 [49%]One kmer 13270/26679 [49%]One kmer 13271/26679 [49%]One kmer 13272/26679 [49%]One kmer 13273/26679 [49%]One kmer 13274/26679 [49%]One kmer 13275/26679 [49%]One kmer 13276/26679 [49%]One kmer 13277/26679 [49%]One kmer 13278/26679 [49%]One kmer 13279/26679 [49%]One kmer 13280/26679 [49%]One kmer 13281/26679 [49%]One kmer 13282/26679 [49%]One kmer 13283/26679 [49%]

One kmer 14454/26679 [54%]One kmer 14455/26679 [54%]One kmer 14456/26679 [54%]One kmer 14457/26679 [54%]One kmer 14458/26679 [54%]One kmer 14459/26679 [54%]One kmer 14460/26679 [54%]One kmer 14461/26679 [54%]One kmer 14462/26679 [54%]One kmer 14463/26679 [54%]One kmer 14464/26679 [54%]One kmer 14465/26679 [54%]One kmer 14466/26679 [54%]One kmer 14467/26679 [54%]One kmer 14468/26679 [54%]One kmer 14469/26679 [54%]One kmer 14470/26679 [54%]One kmer 14471/26679 [54%]One kmer 14472/26679 [54%]One kmer 14473/26679 [54%]One kmer 14474/26679 [54%]One kmer 14475/26679 [54%]One kmer 14476/26679 [54%]One kmer 14477/26679 [54%]One kmer 14478/26679 [54%]One kmer 14479/26679 [54%]One kmer 14480/26679 [54%]One kmer 14481/26679 [54%]One kmer 14482/26679 [54%]One kmer 14483/26679 [54%]One kmer 14484/26679 [54%]One kmer 14485/26679 [54%]One kmer 14486/26679 [54%]One kmer 14487/26679 [54%]One kmer 14488/26679 [54%]One kmer 14489/26679 [54%]One kmer 14490/26679 [54%]

One kmer 15518/26679 [58%]One kmer 15519/26679 [58%]One kmer 15520/26679 [58%]One kmer 15521/26679 [58%]One kmer 15522/26679 [58%]One kmer 15523/26679 [58%]One kmer 15524/26679 [58%]One kmer 15525/26679 [58%]One kmer 15526/26679 [58%]One kmer 15527/26679 [58%]One kmer 15528/26679 [58%]One kmer 15529/26679 [58%]One kmer 15530/26679 [58%]One kmer 15531/26679 [58%]One kmer 15532/26679 [58%]One kmer 15533/26679 [58%]One kmer 15534/26679 [58%]One kmer 15535/26679 [58%]One kmer 15536/26679 [58%]One kmer 15537/26679 [58%]One kmer 15538/26679 [58%]One kmer 15539/26679 [58%]One kmer 15540/26679 [58%]One kmer 15541/26679 [58%]One kmer 15542/26679 [58%]One kmer 15543/26679 [58%]One kmer 15544/26679 [58%]One kmer 15545/26679 [58%]One kmer 15546/26679 [58%]One kmer 15547/26679 [58%]One kmer 15548/26679 [58%]One kmer 15549/26679 [58%]One kmer 15550/26679 [58%]One kmer 15551/26679 [58%]One kmer 15552/26679 [58%]One kmer 15553/26679 [58%]One kmer 15554/26679 [58%]

One kmer 16648/26679 [62%]One kmer 16649/26679 [62%]One kmer 16650/26679 [62%]One kmer 16651/26679 [62%]One kmer 16652/26679 [62%]One kmer 16653/26679 [62%]One kmer 16654/26679 [62%]One kmer 16655/26679 [62%]One kmer 16656/26679 [62%]One kmer 16657/26679 [62%]One kmer 16658/26679 [62%]One kmer 16659/26679 [62%]One kmer 16660/26679 [62%]One kmer 16661/26679 [62%]One kmer 16662/26679 [62%]One kmer 16663/26679 [62%]One kmer 16664/26679 [62%]One kmer 16665/26679 [62%]One kmer 16666/26679 [62%]One kmer 16667/26679 [62%]One kmer 16668/26679 [62%]One kmer 16669/26679 [62%]One kmer 16670/26679 [62%]One kmer 16671/26679 [62%]One kmer 16672/26679 [62%]One kmer 16673/26679 [62%]One kmer 16674/26679 [62%]One kmer 16675/26679 [62%]One kmer 16676/26679 [62%]One kmer 16677/26679 [62%]One kmer 16678/26679 [62%]One kmer 16679/26679 [62%]One kmer 16680/26679 [62%]One kmer 16681/26679 [62%]One kmer 16682/26679 [62%]One kmer 16683/26679 [62%]One kmer 16684/26679 [62%]

One kmer 17517/26679 [65%]One kmer 17518/26679 [65%]One kmer 17519/26679 [65%]One kmer 17520/26679 [65%]One kmer 17521/26679 [65%]One kmer 17522/26679 [65%]One kmer 17523/26679 [65%]One kmer 17524/26679 [65%]One kmer 17525/26679 [65%]One kmer 17526/26679 [65%]One kmer 17527/26679 [65%]One kmer 17528/26679 [65%]One kmer 17529/26679 [65%]One kmer 17530/26679 [65%]One kmer 17531/26679 [65%]One kmer 17532/26679 [65%]One kmer 17533/26679 [65%]One kmer 17534/26679 [65%]One kmer 17535/26679 [65%]One kmer 17536/26679 [65%]One kmer 17537/26679 [65%]One kmer 17538/26679 [65%]One kmer 17539/26679 [65%]One kmer 17540/26679 [65%]One kmer 17541/26679 [65%]One kmer 17542/26679 [65%]One kmer 17543/26679 [65%]One kmer 17544/26679 [65%]One kmer 17545/26679 [65%]One kmer 17546/26679 [65%]One kmer 17547/26679 [65%]One kmer 17548/26679 [65%]One kmer 17549/26679 [65%]One kmer 17550/26679 [65%]One kmer 17551/26679 [65%]One kmer 17552/26679 [65%]One kmer 17553/26679 [65%]

One kmer 18396/26679 [68%]One kmer 18397/26679 [68%]One kmer 18398/26679 [68%]One kmer 18399/26679 [68%]One kmer 18400/26679 [68%]One kmer 18401/26679 [68%]One kmer 18402/26679 [68%]One kmer 18403/26679 [68%]One kmer 18404/26679 [68%]One kmer 18405/26679 [68%]One kmer 18406/26679 [68%]One kmer 18407/26679 [68%]One kmer 18408/26679 [69%]One kmer 18409/26679 [69%]One kmer 18410/26679 [69%]One kmer 18411/26679 [69%]One kmer 18412/26679 [69%]One kmer 18413/26679 [69%]One kmer 18414/26679 [69%]One kmer 18415/26679 [69%]One kmer 18416/26679 [69%]One kmer 18417/26679 [69%]One kmer 18418/26679 [69%]One kmer 18419/26679 [69%]One kmer 18420/26679 [69%]One kmer 18421/26679 [69%]One kmer 18422/26679 [69%]One kmer 18423/26679 [69%]One kmer 18424/26679 [69%]One kmer 18425/26679 [69%]One kmer 18426/26679 [69%]One kmer 18427/26679 [69%]One kmer 18428/26679 [69%]One kmer 18429/26679 [69%]One kmer 18430/26679 [69%]One kmer 18431/26679 [69%]One kmer 18432/26679 [69%]

One kmer 19352/26679 [72%]One kmer 19353/26679 [72%]One kmer 19354/26679 [72%]One kmer 19355/26679 [72%]One kmer 19356/26679 [72%]One kmer 19357/26679 [72%]One kmer 19358/26679 [72%]One kmer 19359/26679 [72%]One kmer 19360/26679 [72%]One kmer 19361/26679 [72%]One kmer 19362/26679 [72%]One kmer 19363/26679 [72%]One kmer 19364/26679 [72%]One kmer 19365/26679 [72%]One kmer 19366/26679 [72%]One kmer 19367/26679 [72%]One kmer 19368/26679 [72%]One kmer 19369/26679 [72%]One kmer 19370/26679 [72%]One kmer 19371/26679 [72%]One kmer 19372/26679 [72%]One kmer 19373/26679 [72%]One kmer 19374/26679 [72%]One kmer 19375/26679 [72%]One kmer 19376/26679 [72%]One kmer 19377/26679 [72%]One kmer 19378/26679 [72%]One kmer 19379/26679 [72%]One kmer 19380/26679 [72%]One kmer 19381/26679 [72%]One kmer 19382/26679 [72%]One kmer 19383/26679 [72%]One kmer 19384/26679 [72%]One kmer 19385/26679 [72%]One kmer 19386/26679 [72%]One kmer 19387/26679 [72%]One kmer 19388/26679 [72%]

One kmer 20049/26679 [75%]One kmer 20050/26679 [75%]One kmer 20051/26679 [75%]One kmer 20052/26679 [75%]One kmer 20053/26679 [75%]One kmer 20054/26679 [75%]One kmer 20055/26679 [75%]One kmer 20056/26679 [75%]One kmer 20057/26679 [75%]One kmer 20058/26679 [75%]One kmer 20059/26679 [75%]One kmer 20060/26679 [75%]One kmer 20061/26679 [75%]One kmer 20062/26679 [75%]One kmer 20063/26679 [75%]One kmer 20064/26679 [75%]One kmer 20065/26679 [75%]One kmer 20066/26679 [75%]One kmer 20067/26679 [75%]One kmer 20068/26679 [75%]One kmer 20069/26679 [75%]One kmer 20070/26679 [75%]One kmer 20071/26679 [75%]One kmer 20072/26679 [75%]One kmer 20073/26679 [75%]One kmer 20074/26679 [75%]One kmer 20075/26679 [75%]One kmer 20076/26679 [75%]One kmer 20077/26679 [75%]One kmer 20078/26679 [75%]One kmer 20079/26679 [75%]One kmer 20080/26679 [75%]One kmer 20081/26679 [75%]One kmer 20082/26679 [75%]One kmer 20083/26679 [75%]One kmer 20084/26679 [75%]One kmer 20085/26679 [75%]

One kmer 21124/26679 [79%]One kmer 21125/26679 [79%]One kmer 21126/26679 [79%]One kmer 21127/26679 [79%]One kmer 21128/26679 [79%]One kmer 21129/26679 [79%]One kmer 21130/26679 [79%]One kmer 21131/26679 [79%]One kmer 21132/26679 [79%]One kmer 21133/26679 [79%]One kmer 21134/26679 [79%]One kmer 21135/26679 [79%]One kmer 21136/26679 [79%]One kmer 21137/26679 [79%]One kmer 21138/26679 [79%]One kmer 21139/26679 [79%]One kmer 21140/26679 [79%]One kmer 21141/26679 [79%]One kmer 21142/26679 [79%]One kmer 21143/26679 [79%]One kmer 21144/26679 [79%]One kmer 21145/26679 [79%]One kmer 21146/26679 [79%]One kmer 21147/26679 [79%]One kmer 21148/26679 [79%]One kmer 21149/26679 [79%]One kmer 21150/26679 [79%]One kmer 21151/26679 [79%]One kmer 21152/26679 [79%]One kmer 21153/26679 [79%]One kmer 21154/26679 [79%]One kmer 21155/26679 [79%]One kmer 21156/26679 [79%]One kmer 21157/26679 [79%]One kmer 21158/26679 [79%]One kmer 21159/26679 [79%]One kmer 21160/26679 [79%]

One kmer 21509/26679 [80%]One kmer 21510/26679 [80%]One kmer 21511/26679 [80%]One kmer 21512/26679 [80%]One kmer 21513/26679 [80%]One kmer 21514/26679 [80%]One kmer 21515/26679 [80%]One kmer 21516/26679 [80%]One kmer 21517/26679 [80%]One kmer 21518/26679 [80%]One kmer 21519/26679 [80%]One kmer 21520/26679 [80%]One kmer 21521/26679 [80%]One kmer 21522/26679 [80%]One kmer 21523/26679 [80%]One kmer 21524/26679 [80%]One kmer 21525/26679 [80%]One kmer 21526/26679 [80%]One kmer 21527/26679 [80%]One kmer 21528/26679 [80%]One kmer 21529/26679 [80%]One kmer 21530/26679 [80%]One kmer 21531/26679 [80%]One kmer 21532/26679 [80%]One kmer 21533/26679 [80%]One kmer 21534/26679 [80%]One kmer 21535/26679 [80%]One kmer 21536/26679 [80%]One kmer 21537/26679 [80%]One kmer 21538/26679 [80%]One kmer 21539/26679 [80%]One kmer 21540/26679 [80%]One kmer 21541/26679 [80%]One kmer 21542/26679 [80%]One kmer 21543/26679 [80%]One kmer 21544/26679 [80%]One kmer 21545/26679 [80%]

One kmer 22042/26679 [82%]One kmer 22043/26679 [82%]One kmer 22044/26679 [82%]One kmer 22045/26679 [82%]One kmer 22046/26679 [82%]One kmer 22047/26679 [82%]One kmer 22048/26679 [82%]One kmer 22049/26679 [82%]One kmer 22050/26679 [82%]One kmer 22051/26679 [82%]One kmer 22052/26679 [82%]One kmer 22053/26679 [82%]One kmer 22054/26679 [82%]One kmer 22055/26679 [82%]One kmer 22056/26679 [82%]One kmer 22057/26679 [82%]One kmer 22058/26679 [82%]One kmer 22059/26679 [82%]One kmer 22060/26679 [82%]One kmer 22061/26679 [82%]One kmer 22062/26679 [82%]One kmer 22063/26679 [82%]One kmer 22064/26679 [82%]One kmer 22065/26679 [82%]One kmer 22066/26679 [82%]One kmer 22067/26679 [82%]One kmer 22068/26679 [82%]One kmer 22069/26679 [82%]One kmer 22070/26679 [82%]One kmer 22071/26679 [82%]One kmer 22072/26679 [82%]One kmer 22073/26679 [82%]One kmer 22074/26679 [82%]One kmer 22075/26679 [82%]One kmer 22076/26679 [82%]One kmer 22077/26679 [82%]One kmer 22078/26679 [82%]

One kmer 22508/26679 [84%]One kmer 22509/26679 [84%]One kmer 22510/26679 [84%]One kmer 22511/26679 [84%]One kmer 22512/26679 [84%]One kmer 22513/26679 [84%]One kmer 22514/26679 [84%]One kmer 22515/26679 [84%]One kmer 22516/26679 [84%]One kmer 22517/26679 [84%]One kmer 22518/26679 [84%]One kmer 22519/26679 [84%]One kmer 22520/26679 [84%]One kmer 22521/26679 [84%]One kmer 22522/26679 [84%]One kmer 22523/26679 [84%]One kmer 22524/26679 [84%]One kmer 22525/26679 [84%]One kmer 22526/26679 [84%]One kmer 22527/26679 [84%]One kmer 22528/26679 [84%]One kmer 22529/26679 [84%]One kmer 22530/26679 [84%]One kmer 22531/26679 [84%]One kmer 22532/26679 [84%]One kmer 22533/26679 [84%]One kmer 22534/26679 [84%]One kmer 22535/26679 [84%]One kmer 22536/26679 [84%]One kmer 22537/26679 [84%]One kmer 22538/26679 [84%]One kmer 22539/26679 [84%]One kmer 22540/26679 [84%]One kmer 22541/26679 [84%]One kmer 22542/26679 [84%]One kmer 22543/26679 [84%]One kmer 22544/26679 [84%]

One kmer 26678/26679 [100%]

In [12]:
db = Database(fasta_file, prots, t, b_dog, y_dog)

## 7. For each spectrum, search the graphs and build an alignment

In [13]:
alignments = {}
gap = 2
for spec_c, spectrum in enumerate(spectra):
    print(f'On spectrum {spec_c + 1}/{len(spectra)} [{int(100 * (spec_c+1)/len(spectra))}%]')
    b_kmers = db.b_dawg.fuzzy_search(spectrum.spectrum, gap, ppm_tol)
    sorted_b_results = sorted(
        [(kmer, scoring.score_subsequence(spectrum.spectrum, kmer, ppm_tol)[0]) for kmer in b_kmers],
         key=lambda x: x[1], reverse=True
    )
    max_score = sorted_b_results[0][1]
    filtered_b_results = [x[0] for x in sorted_b_results if x[1] == max_score]
    
    print(f'\nB results for sequence {spec_c}:\n{filtered_b_results}')

    y_kmers = db.y_dawg.fuzzy_search(spectrum.spectrum, gap, ppm_tol)
    sorted_y_results = sorted(
        [(kmer, scoring.score_subsequence(spectrum.spectrum, kmer, ppm_tol)[1]) for kmer in y_kmers], 
        key=lambda x: x[1], 
        reverse=True
    )
    max_score = sorted_y_results[0][1]
    filtered_y_results = [x[0] for x in sorted_y_results if x[1] == max_score]
    print(f'\nY results for sequence {spec_c}:\n{filtered_y_results}')
    
    alignments[spec_c] = alignment.attempt_alignment(
        spectrum,
        db,
        filtered_b_results,
        filtered_y_results,
        ppm_tolerance=ppm_tol,
        n=3
    )

On spectrum 1/9 [11%]

B results for sequence 0:
['LDDI', 'DLDI', 'PVEGAF', 'EVEV', 'DLDL', 'DIVE', 'DIDI', 'EVVE', 'VEDI', 'IDDI', 'VEDL', 'LDEV', 'VEVE', 'LDID', 'LDDL', 'DLVE', 'DIID', 'LDLD', 'EVLD', 'IDEV', 'IDLD', 'VELD', 'LDVE', 'VEEV', 'DIDL', 'DLEV', 'DILD', 'DLID', 'IDDL', 'KCPQ', 'EVDI']

Y results for sequence 0:
['QWLS', 'DFKG', 'DFGK']
On spectrum 2/9 [22%]

B results for sequence 1:
['SSSY', 'SSPV', 'EGAI', 'EGAD', 'DAAL', 'GEAD', 'DAAD', 'DAAI', 'SSAA', 'ADAD', 'SATG', 'SSYS', 'SASA', 'GEAL', 'ADAL', 'SSVP', 'ADAI', 'SAAS', 'EGAL', 'GEAI', 'SAGT', 'SAPVPVV']

Y results for sequence 1:
['AIVGYK']
On spectrum 3/9 [33%]

B results for sequence 2:
['SLERE', 'TVLS', 'TVEER', 'SSLI', 'SSQPKS', 'SLDKNG', 'SETD', 'SLSL', 'SLEER', 'SILS', 'SSIL', 'SEGGKR', 'TTDD', 'SLTV', 'SINGSR', 'TDTD', 'SLLS', 'TDSE', 'SESE', 'SLIS', 'SSLL', 'TDES', 'SERLE', 'SSEE', 'TVDLW', 'TVVT', 'SIVSNN', 'TDDT', 'TVSI', 'SEDT', 'SEERI', 'SIIS', 'SLSQQA', 'SLFPF', 'SSII', 'SLVT', 'SSGAQAL', 'TVTV', 'SEES

In [14]:
print('Sequence \t | \t scores \t | \t precursor distance')
for i, alignment in alignments.items():
    print(f'Alignments for sequence {i}')
    for a in alignment:
        print(f'{a.sequence} \t | \t b: {a.b_score}, y: {a.y_score} \t | \t {a.precursor_distance}')

Sequence 	 | 	 scores 	 | 	 precursor distance
Alignments for sequence 0
LDDLGNQWLS 	 | 	 b: 2, y: 2 	 | 	 0.9874947499999962
DIIDFGKHKS 	 | 	 b: 2, y: 0 	 | 	 0.5136797500001649
IDLDFGKHKS 	 | 	 b: 2, y: 0 	 | 	 0.5136797500001649
Alignments for sequence 1
DAAIVGYK 	 | 	 b: 3, y: 6 	 | 	 0.0012217500000701875
GEALVGYK 	 | 	 b: 3, y: 6 	 | 	 0.0012217500000701875
ADALVGYK 	 | 	 b: 3, y: 6 	 | 	 0.0012217500000701875
Alignments for sequence 2
Alignments for sequence 3
DPQVEQLEL 	 | 	 b: 6, y: 2 	 | 	 0.000672749999921507
DPQVEQLEN 	 | 	 b: 6, y: 0 	 | 	 0.4787587500001109
PQVEQLENN 	 | 	 b: 1, y: 0 	 | 	 0.013249249999944368
Alignments for sequence 4
DLQTLALWSRM 	 | 	 b: 4, y: 8 	 | 	 0.0001922500000546279
DLQTLAWSRMD 	 | 	 b: 5, y: 0 	 | 	 0.9716317500000287
Alignments for sequence 5
SLEEREHMDVPSV 	 | 	 b: 3, y: 3 	 | 	 0.02964724999981172
DPVLTVEEMDVPSV 	 | 	 b: 1, y: 3 	 | 	 0.985137750000149
KDLTEYMMDVPSV 	 | 	 b: 1, y: 3 	 | 	 0.03157074999978704
Alignments for sequence 6
ELTNIELL 

In [15]:
print(spectra[1].spectrum)
print(sorted(gen_spectrum('VGYK', ion='y', charge=1)['spectrum']))
print(sorted(gen_spectrum('VGYK', ion='y', charge=2)['spectrum']))

[72.08074188232422, 86.0962142944336, 88.0391616821289, 99.05437469482422, 120.08078002929688, 129.10186767578125, 136.074462890625, 141.06512451171875, 142.05029296875, 147.11251831054688, 157.13116455078125, 159.0762939453125, 187.0716094970703, 213.0857391357422, 258.1067810058594, 270.8402099609375, 310.1756591796875, 367.1947326660156, 371.1916809082031, 406.25396728515625, 466.2637634277344, 467.26953125, 579.3480834960938, 650.3857421875, 651.3853149414062]
[147.112804135, 310.176124135, 367.197588135, 466.266002135]
[74.060040285, 155.591700285, 184.102432285, 233.636639285]


In [16]:
da_tol = ppm_to_da(310.176124135, 20)

In [17]:
sorted(db.y_dawg.fuzzy_search(spectra[1].spectrum, 0, 20))

['AATKFI',
 'AFTLKA',
 'AIVGYK',
 'ALTFAK',
 'DFKIK',
 'DKFKL',
 'DKFLK',
 'FKKDL',
 'FSTEV',
 'FSVKAV',
 'GFKVSI',
 'GKFVLS',
 'IKQVY',
 'IKQYV',
 'IKSVFG',
 'IVQKY',
 'KDIVS',
 'KDKFL',
 'KFEKV',
 'KFEVK',
 'KFKVE',
 'KFVLSG',
 'KGSIFV',
 'KIFDK',
 'KKFDL',
 'KKFLD',
 'KLVQY',
 'KYLLN',
 'LAAFTK',
 'LAKTAF',
 'LFKDK',
 'LKYVQ',
 'LLNYK',
 'LLPPPN',
 'LPQPVP',
 'LVQYK',
 'MKIFD',
 'MSVFGK',
 'MYKLL',
 'NIYKL',
 'NKYLL',
 'NLPLPP',
 'NNPELA',
 'NYLIK',
 'PELLN',
 'PSPVAPA',
 'PVAPAPV',
 'PVPAAPV',
 'PVPPLQ',
 'SVFGKL',
 'VAPAPVP',
 'VAPPPAV',
 'VLQKY',
 'VPAAPVP',
 'VPSPVAP',
 'VQLYK',
 'YGVLKA',
 'YKLLN',
 'YLAVKG',
 'YLIKN',
 'YLKNL',
 'YLKQV',
 'YLLKN',
 'YLTIP',
 'YVIQK']