# Test Bed


In [5]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
import numpy as np

from src.gen_spectra import gen_spectrum, get_precursor
from src.objects import Spectrum
from src.utils import insort_by_index, make_sparse_array, ppm_to_da, overlap_intervals
from src.scoring import scoring

from src import main

from src.params import OUTPUT_DIRECTORY

from collections import namedtuple, defaultdict

In [2]:
# run hyped search with the params 
parameters = namedtuple('parameters', 'params')
main.main(parameters('True'))

Loading database...
Done
Loading spectra...
Done
Making the protein mass set...
On protein 279/279 [100%]
Sorting the set of protein masses...
Done. Length of the list: 5674100
Time to build: 32.9628 seconds
Time to do merge: 1.7895 seconds
Creating an alignment for 9/9 [100%]
Finished search. Writting results to /Users/zacharymcgrath/Desktop/Experiment output/filtered_NOD2_E3_AUG_26/...
Could not make an alignment for 1/9 spectra (11%)


In [3]:
# load the values
import json
all_results = json.load(open(OUTPUT_DIRECTORY + 'summary.json', 'r'))

In [4]:
for i, entry in all_results.items():
    print(f'---------- Alignments for spectrum {i} ----------')
    for a in entry['alignments']:
        print(f'{a["sequence"]} \t b score: {a["b_score"]} \t y score: {a["y_score"]}')

---------- Alignments for spectrum 0 ----------
LPVNSPMTKGD 	 b score: 0 	 y score: 0
---------- Alignments for spectrum 1 ----------
GEAIVGYK 	 b score: 3 	 y score: 6
DAAIVGYK 	 b score: 3 	 y score: 6
EGALVGYK 	 b score: 3 	 y score: 6
---------- Alignments for spectrum 2 ----------
---------- Alignments for spectrum 3 ----------
DPQVEQLEL 	 b score: 7 	 y score: 2
FVDLTMPYS 	 b score: 0 	 y score: 0
---------- Alignments for spectrum 4 ----------
DLQTLALWSRM 	 b score: 5 	 y score: 8
DLQTLALEFGLGG 	 b score: 5 	 y score: 7
DLQTLALGQIFD 	 b score: 5 	 y score: 6
---------- Alignments for spectrum 5 ----------
DLTEYLSRFGEVV 	 b score: 5 	 y score: 5
EVTEYLSRFGEVV 	 b score: 5 	 y score: 5
DLTEYLRDTFAAL 	 b score: 3 	 y score: 4
---------- Alignments for spectrum 6 ----------
ELTNLEVE 	 b score: 6 	 y score: 2
ELTNLEDL 	 b score: 6 	 y score: 2
TLNELEDL 	 b score: 5 	 y score: 2
---------- Alignments for spectrum 7 ----------
SSSSLAAPASLFPLKLLYKIVRGDQ 	 b score: 5 	 y score: 1
-------

In [4]:
basic_prots = [
    ('Protein 1', 'MALWARM'), 
    ('Protein 2', 'GGGQQUVRS'), 
    ('Protein 3', 'NRGQWEVE')
]

In [21]:
kmer_mapping = defaultdict(list)

for prot_name, prot_seq in basic_prots:
    for i in range(1, len(prot_seq)):
        for j in range(len(prot_seq) - i + 1):
            kmer_mapping[prot_seq[j:j+i]].append(prot_name)

In [22]:
for k, v in kmer_mapping.items():
    kmer_mapping[k] = list(set(v))
    
kmer_mapping

defaultdict(list,
            {'M': ['Protein 1'],
             'A': ['Protein 1'],
             'L': ['Protein 1'],
             'W': ['Protein 3', 'Protein 1'],
             'R': ['Protein 2', 'Protein 3', 'Protein 1'],
             'MA': ['Protein 1'],
             'AL': ['Protein 1'],
             'LW': ['Protein 1'],
             'WA': ['Protein 1'],
             'AR': ['Protein 1'],
             'RM': ['Protein 1'],
             'MAL': ['Protein 1'],
             'ALW': ['Protein 1'],
             'LWA': ['Protein 1'],
             'WAR': ['Protein 1'],
             'ARM': ['Protein 1'],
             'MALW': ['Protein 1'],
             'ALWA': ['Protein 1'],
             'LWAR': ['Protein 1'],
             'WARM': ['Protein 1'],
             'MALWA': ['Protein 1'],
             'ALWAR': ['Protein 1'],
             'LWARM': ['Protein 1'],
             'MALWAR': ['Protein 1'],
             'ALWARM': ['Protein 1'],
             'G': ['Protein 2', 'Protein 3'],
             'Q': ['Pr