## 04 - Understanding the Results

Let's take a look at the outputs generated in part 03. The first thing we need to do is to again load in the libraries we'll want to use.

In [18]:
'''
In order to import pyhxexpress and test_config from a directory different than our 
current working directory we need to add the location of those files to the system path.

In this case, I am working in the Documentation folder, pyHXexpress and the data are both up a level
'''

import sys
import os
import pathlib

hxex_path = os.path.join(pathlib.Path(os.getcwd()).parent)
data_path = os.path.join(pathlib.Path(os.getcwd()).parent,'Bimodal_HDX_Data')
sys.path.append(hxex_path)
sys.path.append(data_path)

import numpy as np, pandas as pd
import importlib
pd.set_option('display.max_columns',None) 
pd.set_option('display.max_colwidth', None)

import pyhxexpress.hxex as hxex
import test_config as config

def hxex_reload():
    importlib.reload(hxex)
    importlib.reload(config)
    hxex.config = config

hxex_reload()

Lets read in the outputs we generated in part 3. These are the metadf_asrun, data_fits, and fitparamsALL files.<p>
We didn't save the raw and peakpicked data to separate csv files, but we can quickly gather those using the 'get_data()' function.

In [19]:
[item.name for item in pathlib.Path(os.path.join(os.getcwd(),config.Output_DIR)).iterdir()]

['data_fits_asrun_16Jan2025.csv',
 'fitparamsAll_asrun_16Jan2025.csv',
 'hdxms_params_16Jan2025.py',
 'metadf_asrun_16Jan2025.csv']

In [23]:
#output_path = config.Output_DIR
metadf_run = pd.read_csv(os.path.join(config.Output_DIR,'metadf_asrun_16Jan2025.csv')).drop('Index',axis=1)
datafits = pd.read_csv(os.path.join(config.Output_DIR,'data_fits16Jan2025.csv')).drop('Index',axis=1)
fitparams = pd.read_csv(os.path.join(config.Output_DIR,'fitparamsAll_asrun_16Jan2025.csv')).drop('Index',axis=1)

deutdata, rawdata = hxex.get_data(metadf_run)

In [24]:
# These are the files that were run and the summary of timepoints that are polymodal
display(metadf_run)

Unnamed: 0,file,sample,start_seq,end_seq,peptide_range,charge,peptide,polymodal,dataset_run
0,Angio_2_HI.xlsx,AngioII_HI,1,8,0001-0008,2,DRVYIHPF,60.0 120.0 180.0 240.0 300.0 360.0 420.0 480.0 540.0 600.0 660.0 720.0 780.0 840.0 900.0 960.0 1020.0 1080.0 1140.0 1200.0 1260.0,Yes
1,GluFib_2_HI.xlsx,GluFib_HI,1,14,0001-0014,2,EGVNDNEEGFFSAR,240.0 300.0 360.0 420.0 480.0 540.0 600.0 660.0 720.0 780.0 840.0 900.0 960.0 1020.0 1080.0 1140.0 1200.0 1260.0,Yes


In [25]:
'''
This is the peak picked data for every peptide/timepoint/charge/replicate corresponding to the 'file's in metadf_run 
There are additional columns which may include 'env_width', 'env_symm', and 'TD_env_width', these will be discussed more
in an advanced topics tutorial. They are 'Features' from the unfit peak picked data that I have used to train an ML model on 
for predicting whether there are 1 or more populations present. 
'''
display(deutdata)

Unnamed: 0,mz,Intensity,n_deut,env_width,env_symm,max_namides,time,data_id,sample,peptide,charge,rep,peptide_range,start_seq,end_seq,file,time_idx
0,523.774534,574000.0,0,2.568142,2.0,6,0.0,0,AngioII_HI,DRVYIHPF,2,1,0001-0008,1,8,Angio_2_HI.xlsx,0
1,524.276211,352400.0,1,2.568142,2.0,6,0.0,0,AngioII_HI,DRVYIHPF,2,1,0001-0008,1,8,Angio_2_HI.xlsx,0
2,524.777889,103200.0,2,2.568142,2.0,6,0.0,0,AngioII_HI,DRVYIHPF,2,1,0001-0008,1,8,Angio_2_HI.xlsx,0
3,525.280297,21230.0,3,2.568142,2.0,6,0.0,0,AngioII_HI,DRVYIHPF,2,1,0001-0008,1,8,Angio_2_HI.xlsx,0
4,525.782705,4580.0,4,2.568142,2.0,6,0.0,0,AngioII_HI,DRVYIHPF,2,1,0001-0008,1,8,Angio_2_HI.xlsx,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
764,793.384753,31890.0,15,15.138171,1.1,13,1260.0,1,GluFib_HI,EGVNDNEEGFFSAR,2,1,0001-0014,1,14,GluFib_2_HI.xlsx,21
765,793.887892,11210.0,16,15.138171,1.1,13,1260.0,1,GluFib_HI,EGVNDNEEGFFSAR,2,1,0001-0014,1,14,GluFib_2_HI.xlsx,21
766,794.391030,3765.0,17,15.138171,1.1,13,1260.0,1,GluFib_HI,EGVNDNEEGFFSAR,2,1,0001-0014,1,14,GluFib_2_HI.xlsx,21
767,794.894169,1515.0,18,15.138171,1.1,13,1260.0,1,GluFib_HI,EGVNDNEEGFFSAR,2,1,0001-0014,1,14,GluFib_2_HI.xlsx,21


In [26]:
# This is the raw spectral data corresponding to the files in metadf_run
display(rawdata)

Unnamed: 0,index,mz,Intensity,time,data_id,sample,peptide,charge,rep,peptide_range,start_seq,end_seq,file,time_idx
0,0,523.435,277.00,0.0,0,AngioII_HI,DRVYIHPF,2,1,0001-0008,1,8,Angio_2_HI.xlsx,0
1,1,523.445,236.80,0.0,0,AngioII_HI,DRVYIHPF,2,1,0001-0008,1,8,Angio_2_HI.xlsx,0
2,2,523.455,164.80,0.0,0,AngioII_HI,DRVYIHPF,2,1,0001-0008,1,8,Angio_2_HI.xlsx,0
3,3,523.465,126.00,0.0,0,AngioII_HI,DRVYIHPF,2,1,0001-0008,1,8,Angio_2_HI.xlsx,0
4,4,523.475,107.70,0.0,0,AngioII_HI,DRVYIHPF,2,1,0001-0008,1,8,Angio_2_HI.xlsx,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31954,799,795.230,108.30,1260.0,1,GluFib_HI,EGVNDNEEGFFSAR,2,1,0001-0014,1,14,GluFib_2_HI.xlsx,21
31955,800,795.243,86.75,1260.0,1,GluFib_HI,EGVNDNEEGFFSAR,2,1,0001-0014,1,14,GluFib_2_HI.xlsx,21
31956,801,795.255,62.75,1260.0,1,GluFib_HI,EGVNDNEEGFFSAR,2,1,0001-0014,1,14,GluFib_2_HI.xlsx,21
31957,802,795.267,68.00,1260.0,1,GluFib_HI,EGVNDNEEGFFSAR,2,1,0001-0014,1,14,GluFib_2_HI.xlsx,21


In [27]:
'''
This dataframe contains some of the values we're usually most interested in such as Dabs_ and pop_ and the centroids
Dabs_i is the TD-UN corrected value for the Deuterium uptake with a corresponding population pop_i 

The p-values for any fit_pops > min_pops should be less than the specified 'Ncurve_p_accept' value. The p-value will be 1.0 
for fit_pops = min_pops, meaning additional populations did not sufficiently reduce the p-value. 
'''
display(datafits)

Unnamed: 0,data_id,sample,peptide,peptide_range,start_seq,end_seq,charge,time,time_idx,rep,max_Int,centroid,env_width,env_symm,max_namides,UN_TD_corr,fit_pops,p-value,centroid_1,Dabs_1,Dabs_std_1,pop_1,pop_std_1,centroid_2,Dabs_2,Dabs_std_2,pop_2,pop_std_2,centroid_3,Dabs_3,Dabs_std_3,pop_3,pop_std_3,centroid_4,Dabs_4,Dabs_std_4,pop_4,pop_std_4,solution_npops
0,2,AngioII_HI,DRVYIHPF,0001-0008,1,8,2,0.0,0,1,574000.0,524.089187,2.568142,2.0,6,0.998088,1,1.0,524.097421,0.001366,0.003365,1.0,0.0,,,,,,,,,,,,,,,,1
1,2,AngioII_HI,DRVYIHPF,0001-0008,1,8,2,60.0,1,1,302100.0,524.914705,5.455503,1.251208,6,0.998088,2,3.963504e-07,524.579653,0.959922,0.174921,0.407486,0.221568,525.106505,2.154768,0.402732,0.592514,0.221568,,,,,,,,,,,1
2,2,AngioII_HI,DRVYIHPF,0001-0008,1,8,2,120.0,2,1,258000.0,525.673768,6.604516,1.1,6,0.998088,2,0.007924798,525.669468,2.854909,0.273129,0.590964,0.244644,525.682373,3.496138,0.298648,0.409036,0.244644,,,,,,,,,,,1
3,2,AngioII_HI,DRVYIHPF,0001-0008,1,8,2,180.0,3,1,260500.0,526.46316,6.505287,1.1,6,0.998088,2,7.716762e-05,526.307576,4.06406,0.175061,0.202818,0.187858,526.479505,4.901264,0.333078,0.797182,0.187858,,,,,,,,,,,1
4,2,AngioII_HI,DRVYIHPF,0001-0008,1,8,2,240.0,4,1,264300.0,525.442186,8.882579,2.0,6,0.998088,2,4.414247e-13,524.113171,0.029194,0.012229,0.538517,0.00883,526.987728,5.740685,0.049646,0.461483,0.00883,,,,,,,,,,,2
5,2,AngioII_HI,DRVYIHPF,0001-0008,1,8,2,300.0,5,1,304100.0,524.468802,4.656814,2.0,6,0.998088,2,1.258105e-12,524.147822,0.159126,0.075714,0.65094,0.074691,524.91847,1.852431,0.260007,0.34906,0.074691,,,,,,,,,,,2
6,2,AngioII_HI,DRVYIHPF,0001-0008,1,8,2,360.0,6,1,264200.0,524.855596,6.485148,2.0,6,0.998088,2,2.220446e-16,524.138327,0.086577,0.023094,0.553383,0.01305,525.716247,3.241441,0.06278,0.446617,0.01305,,,,,,,,,,,2
7,2,AngioII_HI,DRVYIHPF,0001-0008,1,8,2,420.0,7,1,205400.0,525.166911,7.807363,2.0,6,0.998088,2,5.222489e-13,524.112965,0.033207,0.023139,0.538667,0.008634,526.359251,4.498539,0.056842,0.461333,0.008634,,,,,,,,,,,2
8,2,AngioII_HI,DRVYIHPF,0001-0008,1,8,2,480.0,8,1,134900.0,525.940605,9.390881,1.24685,6,0.998088,2,6.661338e-16,524.882038,1.57376,0.069124,0.487138,0.015255,526.921444,5.620475,0.071327,0.512862,0.015255,,,,,,,,,,,2
9,2,AngioII_HI,DRVYIHPF,0001-0008,1,8,2,540.0,9,1,241500.0,525.335343,6.846569,1.157764,6,0.998088,2,2.015421e-11,524.534347,0.902748,0.324543,0.200893,0.117602,525.488406,2.829228,0.175716,0.799107,0.117602,,,,,,,,,,,2


In [28]:
'''
The fitparams dataframe contains all of the fit variables determined during the full run. Each fit spectrum will have
fit_pops + Nboot number of entries. Consider the first fit spectrum: the first row is the n=1 fit, the second row is 
the n=2 fit. The n=2 fit did not sufficiently reduce the residual sum squared (rss) value so the Nboot fits are performed
on n=1 populations (failed p-value test to add another population). 
'''
display(fitparams)

Unnamed: 0,data_id,sample,peptide,peptide_range,start_seq,end_seq,charge,time,time_idx,rep,ncurves,nboot,rss,Fit_Params,solution_npops,p-value
0,2,AngioII_HI,DRVYIHPF,0001-0008,1,8,2,0.0,0,1,1,0,1.600046e-05,0.005248293808740953 0.006232132327061506 7.247133588408689e-10 1.0,1,1.0
1,2,AngioII_HI,DRVYIHPF,0001-0008,1,8,2,0.0,0,1,2,0,1.600046e-05,0.0052482953334321205 5.843965573636405e-07 3.074487567966645e-05 6.679750869625749e-10 7.518323863954792e-06 0.9999993739455897 6.260544103549107e-07,1,1.0
2,2,AngioII_HI,DRVYIHPF,0001-0008,1,8,2,0.0,0,1,1,1,1.190136e-06,0.0031182928733498056 2.7823501225808024e-06 1.480230414300589e-05 1.0,1,1.0
3,2,AngioII_HI,DRVYIHPF,0001-0008,1,8,2,0.0,0,1,1,2,1.145674e-06,0.00473220641458599 0.00016432813901643274 5.1399003025497666e-05 1.0,1,1.0
4,2,AngioII_HI,DRVYIHPF,0001-0008,1,8,2,0.0,0,1,1,3,1.146417e-06,0.0046673789002506455 9.993869623402041e-11 0.00646771180124329 1.0,1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1054,5,GluFib_HI,EGVNDNEEGFFSAR,0001-0014,1,14,2,1000000.0,22,1,1,16,1.355799e-07,0.0036348683952800802 13.792398467156978 0.8655934527602004 1.0,1,1.0
1055,5,GluFib_HI,EGVNDNEEGFFSAR,0001-0014,1,14,2,1000000.0,22,1,1,17,1.890514e-07,0.004142566450593442 13.723384200513266 0.8658805368791417 1.0,1,1.0
1056,5,GluFib_HI,EGVNDNEEGFFSAR,0001-0014,1,14,2,1000000.0,22,1,1,18,1.336577e-07,1.2785796081830633e-10 13.85928944539912 0.8621588802517847 1.0,1,1.0
1057,5,GluFib_HI,EGVNDNEEGFFSAR,0001-0014,1,14,2,1000000.0,22,1,1,19,8.653029e-08,0.0034816876525840526 13.76727858610381 0.8657941179939492 1.0,1,1.0


In [30]:
fitparams[(fitparams['data_id']==5) & (fitparams['time_idx']==15)]

Unnamed: 0,data_id,sample,peptide,peptide_range,start_seq,end_seq,charge,time,time_idx,rep,ncurves,nboot,rss,Fit_Params,solution_npops,p-value
873,5,GluFib_HI,EGVNDNEEGFFSAR,0001-0014,1,14,2,900.0,15,1,1,0,9.097333e-05,3.766990584286857e-16 15.599999999978374 0.38541220168685336 1.0,2,1.0
874,5,GluFib_HI,EGVNDNEEGFFSAR,0001-0014,1,14,2,900.0,15,1,2,0,1.349722e-06,1.0910223601038284e-32 5.66237463499632 14.416880273180023 0.4836170303793014 0.43780587086593015 0.12851396792287617 0.8714860320771238,2,1.933939e-09
875,5,GluFib_HI,EGVNDNEEGFFSAR,0001-0014,1,14,2,900.0,15,1,3,0,1.117134e-06,1.42628934347736e-38 15.599992290756502 15.59970737146241 11.65374906737242 0.23998093009955082 0.5920424785902717 0.5556923379143118 0.2639743627592453 0.040507029599364405 0.6955186076413903,2,1.405556
876,5,GluFib_HI,EGVNDNEEGFFSAR,0001-0014,1,14,2,900.0,15,1,2,1,6.341739e-07,0.005614765718549334 15.599979393677533 13.147396389247158 0.2593497609166821 0.5171501968287218 0.31435718633973125 0.6856428136602687,2,1.933939e-09
877,5,GluFib_HI,EGVNDNEEGFFSAR,0001-0014,1,14,2,900.0,15,1,2,2,4.12086e-07,0.004006923873102697 6.903806258061973 15.35493614377187 0.4432357445163378 0.4128378134320934 0.12187564054058032 0.8781243594594197,2,1.933939e-09
878,5,GluFib_HI,EGVNDNEEGFFSAR,0001-0014,1,14,2,900.0,15,1,2,3,1.190116e-06,0.0 6.45261459688543 15.599999999999863 0.5659912377504646 0.416545164569482 0.17403773615699855 0.8259622638430014,2,1.933939e-09
879,5,GluFib_HI,EGVNDNEEGFFSAR,0001-0014,1,14,2,900.0,15,1,2,4,1.24709e-07,4.185181204841064e-14 3.8215534613665945 14.782435265716403 0.6494666290459262 0.4222804800804558 0.10134893078028598 0.898651069219714,2,1.933939e-09
880,5,GluFib_HI,EGVNDNEEGFFSAR,0001-0014,1,14,2,900.0,15,1,2,5,2.628625e-07,0.0014460203294066794 15.599999999992074 15.599999999999998 0.1687669709950145 0.4037720229856819 0.12380722654646811 0.8761927734535319,2,1.933939e-09
881,5,GluFib_HI,EGVNDNEEGFFSAR,0001-0014,1,14,2,900.0,15,1,2,6,2.921887e-07,0.008525394147568947 4.000000350415311 15.596235469416936 0.6261102147073634 0.40208331938973635 0.11035799018763169 0.8896420098123684,2,1.933939e-09
882,5,GluFib_HI,EGVNDNEEGFFSAR,0001-0014,1,14,2,900.0,15,1,2,7,1.372477e-06,0.012853088117341286 5.822484186739096 12.003421690155095 0.5360340878831488 0.5419042005720691 0.22868424575418433 0.7713157542458156,2,1.933939e-09
