# Test Bed


In [2]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
import numpy as np

from src.gen_spectra import gen_spectrum, get_precursor
from src.objects import Spectrum
from src.utils import insort_by_index, make_sparse_array, ppm_to_da, overlap_intervals
from src.scoring import scoring

# from src import main

from src.params import OUTPUT_DIRECTORY

from collections import namedtuple

In [2]:
# run hyped search with the params 
parameters = namedtuple('parameteres', 'params')
main.main(parameters('True'))

Loading database...
Done
Loading spectra...
Done
Making the protein mass set...
On protein 279/279 [100%]
Sorting the set of protein masses...
Done. Length of the list: 5674100
Time to build: 32.9628 seconds
Time to do merge: 1.7895 seconds
Creating an alignment for 9/9 [100%]
Finished search. Writting results to /Users/zacharymcgrath/Desktop/Experiment output/filtered_NOD2_E3_AUG_26/...
Could not make an alignment for 1/9 spectra (11%)


In [3]:
# load the values
import json
all_results = json.load(open(OUTPUT_DIRECTORY + 'summary.json', 'r'))

In [4]:
for i, entry in all_results.items():
    print(f'---------- Alignments for spectrum {i} ----------')
    for a in entry['alignments']:
        print(f'{a["sequence"]} \t b score: {a["b_score"]} \t y score: {a["y_score"]}')

---------- Alignments for spectrum 0 ----------
LPVNSPMTKGD 	 b score: 0 	 y score: 0
---------- Alignments for spectrum 1 ----------
GEAIVGYK 	 b score: 3 	 y score: 6
DAAIVGYK 	 b score: 3 	 y score: 6
EGALVGYK 	 b score: 3 	 y score: 6
---------- Alignments for spectrum 2 ----------
---------- Alignments for spectrum 3 ----------
DPQVEQLEL 	 b score: 7 	 y score: 2
FVDLTMPYS 	 b score: 0 	 y score: 0
---------- Alignments for spectrum 4 ----------
DLQTLALWSRM 	 b score: 5 	 y score: 8
DLQTLALEFGLGG 	 b score: 5 	 y score: 7
DLQTLALGQIFD 	 b score: 5 	 y score: 6
---------- Alignments for spectrum 5 ----------
DLTEYLSRFGEVV 	 b score: 5 	 y score: 5
EVTEYLSRFGEVV 	 b score: 5 	 y score: 5
DLTEYLRDTFAAL 	 b score: 3 	 y score: 4
---------- Alignments for spectrum 6 ----------
ELTNLEVE 	 b score: 6 	 y score: 2
ELTNLEDL 	 b score: 6 	 y score: 2
TLNELEDL 	 b score: 5 	 y score: 2
---------- Alignments for spectrum 7 ----------
SSSSLAAPASLFPLKLLYKIVRGDQ 	 b score: 5 	 y score: 1
-------

In [8]:
seq1 = 'MALWARM' 
seq2 = 'GGGQQUVRS' 
seq3 = 'NRGQWEVE'


In [22]:
# create boundaries for every sequence
def make_boundary(val):
    da_tol = ppm_to_da(val, 20)
    return [round(val - da_tol, 4), round(val + da_tol, 4)]

In [23]:
boundaries = []
for seq in [seq1, seq2, seq3]:
    b_spec = gen_spectrum(seq, ion='b')['spectrum']
    y_spec = gen_spectrum(seq, ion='y')['spectrum']
    
    for i in range(len(seq)):
        boundaries.append((make_boundary(b_spec[i]), seq[:i+1]))
        boundaries.append((make_boundary(y_spec[i]), seq[-i-1:]))  
        
boundaries.sort(key=lambda x: x[0][0])

In [24]:
boundaries

[([58.0276, 58.0299], 'G'),
 ([106.0477, 106.052], 'S'),
 ([115.0479, 115.0525], 'GG'),
 ([115.0479, 115.0525], 'N'),
 ([132.0451, 132.0504], 'M'),
 ([148.0575, 148.0634], 'E'),
 ([150.0553, 150.0613], 'M'),
 ([172.0682, 172.0751], 'GGG'),
 ([203.0808, 203.0889], 'MA'),
 ([247.1239, 247.1338], 'VE'),
 ([262.1457, 262.1562], 'RS'),
 ([271.1459, 271.1567], 'NR'),
 ([300.1242, 300.1362], 'GGGQ'),
 ([306.1533, 306.1656], 'RM'),
 ([316.1626, 316.1753], 'MAL'),
 ([328.1662, 328.1793], 'NRG'),
 ([361.2122, 361.2266], 'VRS'),
 ([376.1639, 376.179], 'EVE'),
 ([377.189, 377.2041], 'ARM'),
 ([428.1803, 428.1974], 'GGGQQ'),
 ([456.2222, 456.2405], 'NRGQ'),
 ([502.2382, 502.2583], 'MALW'),
 ([512.1628, 512.1833], 'UVRS'),
 ([562.2395, 562.262], 'WEVE'),
 ([563.2646, 563.2871], 'WARM'),
 ([573.2739, 573.2968], 'MALWA'),
 ([579.1309, 579.154], 'GGGQQU'),
 ([640.2188, 640.2444], 'QUVRS'),
 ([642.2978, 642.3235], 'NRGQW'),
 ([676.3464, 676.3735], 'LWARM'),
 ([678.1973, 678.2244], 'GGGQQUV'),
 ([690.295

In [25]:
b = [([58.0276, 58.0299], 'G'),
 ([106.0477, 106.052], 'S'),
 ([115.0479, 115.0525], 'GG, N'),
 ([132.0451, 132.0504], 'M'),
 ([148.0575, 148.0634], 'E'),
 ([150.0553, 150.0613], 'M'),
 ([172.0682, 172.0751], 'GGG'),
 ([203.0808, 203.0889], 'MA'),
 ([247.1239, 247.1338], 'VE'),
 ([262.1457, 262.1562], 'RS'),
 ([271.1459, 271.1567], 'NR'),
 ([300.1242, 300.1362], 'GGGQ'),
 ([306.1533, 306.1656], 'RM'),
 ([316.1626, 316.1753], 'MAL'),
 ([328.1662, 328.1793], 'NRG'),
 ([361.2122, 361.2266], 'VRS'),
 ([376.1639, 376.179], 'EVE'),
 ([377.189, 377.2041], 'ARM'),
 ([428.1803, 428.1974], 'GGGQQ'),
 ([456.2222, 456.2405], 'NRGQ'),
 ([502.2382, 502.2583], 'MALW'),
 ([512.1628, 512.1833], 'UVRS'),
 ([562.2395, 562.262], 'WEVE'),
 ([563.2646, 563.2871], 'WARM'),
 ([573.2739, 573.2968], 'MALWA'),
 ([579.1309, 579.154], 'GGGQQU'),
 ([640.2188, 640.2444], 'QUVRS'),
 ([642.2978, 642.3235], 'NRGQW'),
 ([676.3464, 676.3735], 'LWARM'),
 ([678.1973, 678.2244], 'GGGQQUV'),
 ([690.2955, 690.3231], 'QWEVE'),
 ([729.3719, 729.4011], 'MALWAR'),
 ([747.3158, 747.412], 'GQWEVE, ALWARM'),
 ([768.2748, 768.3055], 'QQUVRS'),
 ([771.3378, 771.3687], 'NRGQWE'),
 ([825.2951, 825.3282], 'GQQUVRS'),
 ([834.2953, 834.3287], 'GGGQQUVR'),
 ([860.4098, 860.4442], 'MALWARM'),
 ([870.4043, 870.4391], 'NRGQWEV'),
 ([878.42, 878.4551], 'MALWARM'),
 ([882.3155, 882.3508], 'GGQQUVRS'),
 ([903.4138, 903.45], 'RGQWEVE'),
 ([921.3256, 921.3624], 'GGGQQUVRS'),
 ([939.3358, 939.3734], 'GGGQQUVRS'),
 ([999.4443, 999.4843], 'NRGQWEVE'),
 ([1017.4545, 1017.4952], 'NRGQWEVE')]

In [27]:
mappings = {'-'.join([str(x) for x in k]): v for k, v in b}
mappings

{'58.0276-58.0299': 'G',
 '106.0477-106.052': 'S',
 '115.0479-115.0525': 'GG, N',
 '132.0451-132.0504': 'M',
 '148.0575-148.0634': 'E',
 '150.0553-150.0613': 'M',
 '172.0682-172.0751': 'GGG',
 '203.0808-203.0889': 'MA',
 '247.1239-247.1338': 'VE',
 '262.1457-262.1562': 'RS',
 '271.1459-271.1567': 'NR',
 '300.1242-300.1362': 'GGGQ',
 '306.1533-306.1656': 'RM',
 '316.1626-316.1753': 'MAL',
 '328.1662-328.1793': 'NRG',
 '361.2122-361.2266': 'VRS',
 '376.1639-376.179': 'EVE',
 '377.189-377.2041': 'ARM',
 '428.1803-428.1974': 'GGGQQ',
 '456.2222-456.2405': 'NRGQ',
 '502.2382-502.2583': 'MALW',
 '512.1628-512.1833': 'UVRS',
 '562.2395-562.262': 'WEVE',
 '563.2646-563.2871': 'WARM',
 '573.2739-573.2968': 'MALWA',
 '579.1309-579.154': 'GGGQQU',
 '640.2188-640.2444': 'QUVRS',
 '642.2978-642.3235': 'NRGQW',
 '676.3464-676.3735': 'LWARM',
 '678.1973-678.2244': 'GGGQQUV',
 '690.2955-690.3231': 'QWEVE',
 '729.3719-729.4011': 'MALWAR',
 '747.3158-747.412': 'GQWEVE, ALWARM',
 '768.2748-768.3055': 'QQ

In [8]:
sorted(gen_spectrum('ALWARM')['spectrum'])

[36.525833435,
 72.044390435,
 75.53280128499999,
 93.067865435,
 150.058326135,
 153.58335678499998,
 185.12845443499998,
 186.10752193500002,
 189.10191378499997,
 221.626078935,
 282.14157028499994,
 299.676634435,
 306.15943713499996,
 338.68360228499995,
 365.196876935,
 371.207767435,
 374.20215928499994,
 377.19655113499994,
 442.244881435,
 563.2758641349999,
 598.345992435,
 676.3599281349999,
 729.386477435,
 747.3970421349999]