## 02 - The configuration file

pyHXExpress reads in default configuration values from the included config.py file. <br>
Users should create their own user_config.py (or other.py filename) with these settings updated appropriately. 
These settings are referred to in pyhxexpress with a 'config.' prefix. 

In [None]:
### Example user_config.py for the test data sets 

'''
This top bit is so we can create ouput directories if needed and use the current date in file names.
'''
import os
from datetime import datetime
now = datetime.now()
date = now.strftime("%d%b%Y")

'''
# These three lines of code should be in your Jupyter Notebook or python script.
# They will update the default config parameters to the user config parameters.
# Here a file named test_config.py (in the working directory) contains the configuration parameters. 
# If, for example, the file were called user_params.py 
#               the code should instead be: import user_params as config
# pyhxexpress is imported as hxex for a convenient shorthand nickname
 
import pyhxexpress.hxex as hxex 
import test_config as config
hxex.config = config
'''
##########################################
'''Settings for Test Data Sets'''
##########################################

WRITE_PARAMS = True #save user_config to hdxms_params_$.py file in Data_DIR, this can then be used as the user_config.py file
Allow_Overwrite = True #don't create a new filename if file already exists

Data_DIR = '/home/tuttle/data/HDX-MS/pyHXExpress/Bimodal_HDX_Data'
Output_DIR = os.path.join(Data_DIR,'output_'+str(date),'') # Location where output figures and dataframes will be saved
#if not os.path.exists(Output_DIR): os.makedirs(Output_DIR) #This is the command to create the directory if needed

Test_Data = True # This is only for use with a specific set of Experimental Test data from Mike Guttman's lab.
Data_Type = 1 # Set to (1) for HX-Express tabular data or (2) for SpecExport data
Save_Spectra = False # When the run_hdx_fits() is complete, save all the run spectra into 2 files: peakpicked and rawdata

# The following three parameters control what data will be analyzed. Recommend instead to leave process_ALL as True
# and to use the hxex.filter_df() function to choose which data to analyze
process_ALL = True # If true, the program will analyze all available data in the specified Data_DIR
User_mutants = [''] # User can specify subset of samples to analyze
User_peptides = [''] # User can specify subset of peptide ranges to analyze

Read_Spectra_List = True #get the meta data dataframe 'metadf' from Metadf_File
Metadf_File = "hdxms_testsets_metadf.csv" # This file should be located in the Data_DIR             

Preset_Pops = False #Use predetermined number of populations to fit curves, overrides Min/Max_Pops if given, requires Preset_Pops_File
Preset_Pops_File = os.path.join(Data_DIR,"test_configdf_26feb2024.csv")  
# The Preset_Pops_File is like the output 'datafits' file (a row for every spectra: sample/peptide/charge/timepoint/rep)
# with additional columns called 'min_pops' and 'max_pops' with the integer number of curves to fit specified

Generate_Plots = True # create output plots and save as pdf
Hide_Figure_Output = False #True Recommended when processing lots of data. Plots won't be displayed in the notebook during the run.
SVG = False # also save figures as an svg file, slow, but better for making figures. Recommend only enabling for subset of data of interest. 

BestFit_of_X = 3 # Try X unique fits to determine best_n_curves. Not used if Use_DiffEvo = True
Nboot = 20  # Once the best number of curves to fit is determined, an addition Nboot fits will be performed
            # Nboot is the number of individual fits to perform, using n_best_curves from initial round of fits
setNoise = None #if noise value is known, specify instead of estimating as Y_ERR % of avg Un+TD peaks
Y_ERR = 1.0 #Percent random error applied during boot as y+np.random.normal(0,yerr), 0.0 for NoNoise, use ~0.5% for noise added
            # the absolute Noise value is then Y_ERR * avg(maxInt of Un and TD)
            # this is a very rough way to give a consistent Noise value throughout a dataset. 
Dfrac = 0.90 # This is the fraction deuteration in the D2O exchange buffer, and represents the theoretical max deuteration fraction (w/o backexchange).
Nterm_subtract = 2 # number of N-term residues to remove from possible exchanging NH's (usually 1 or 2)
                   # This will mostly affect the corrected Dabs value as it is scaled to make TD = theoretical Nex
Zero_Filling = 3 # Number of zero points to have on tail of picked data
Peak_Resolution = 70.0 #ppm, sensitivity of peak picker to expected m/z centers  #LeuEnk wants higher, pep122 wants lower ...
Binomial_dCorr = True # fit n=1 binomial for UN/TD to calculate d_corr and back exchange values
Env_threshold = 0.1 # find the envelope width at Env_threshold * Intensity_max
Limit_by_envelope = False # only fit up to n = int(z*env/3*Env_limit - 2/3) # very rough way to set the high_pop per spectrum 
Env_limit = 0.95 #used if Limit_by_envelope = True, rough measure to constrain n_curves fit according to data width & num fit params
Min_Pops = 1 # Min_Pops to Max_Pops sets the range of populations to fit, set to same value to force single population
Max_Pops = 3 # maximum number of underlying populations to fit
Nex_Max_Scale = 1.2 #multipler of how much to let Nex exceed the number predicted exchangable backbone NHs
Pop_Thresh = 0.03 # fall back to n-1 curves if population is below this, does not apply to bootstrap fits, but does exclude from boot average values
Ncurve_p_accept = 0.05 #stringency for accepting more fit populations, higher permits more populations, reasonable values are 0.01 to 0.05 
                        # set to a large value (e.g. 1000) if you want to fit all data to each min to max population
Random_Seed = 16 #used for parameter initialization
  
Scale_Y_Values = True # if Scale_Y_Values = True, plots will be in original Intensity units
                # fit will always be on normalized Intensity as it is much faster               
Keep_Raw = True # peak_picking will retain the Raw spectrum if True, if False will only keep peaks, auto True for Test_Data
Overlay_replicates = True #add column to figures that is overlay of all available replicates

########################################
'''end user input''';
########################################
