In [None]:
%load_ext autoreload
%autoreload 2

#Setting Up and Importing the Necessary Packages/Libraries
##Package for reading in Bruker OPUS type files
from brukeropusreader import read_file
import matplotlib.pyplot as plt
from scipy.interpolate import UnivariateSpline
import pathlib
import numpy as np
import pandas as pd
#Local Functions
from hydrogenase_processing.cut_range import cut_range_subtraction, cut_range_subtraction_multiple_wv
from hydrogenase_processing.second_deriv import second_deriv, first_deriv
from hydrogenase_processing.anchor_points import get_peaks, get_start_end_anchorpoints, get_all_anchor_points
from hydrogenase_processing.vaporfit import atm_subtraction
from hydrogenase_processing.vaporfit import AtmFitParams
#testing
#find peaks
from scipy.signal import find_peaks, peak_widths


In [None]:
#Paths to Local Data
path_to_water_vapor_data = pathlib.Path("../../data/opus_files/water_vapor")
path_to_output_plots_= pathlib.Path("../../data/output_plots/")
path_to_pD8_data = pathlib.Path("../../data/opus_files/pD8")

In [None]:
#Pulling in all pD8 sample data
pD8_raw_files = list(path_to_pD8_data.iterdir())
pD8_raw_files.sort()

#Initializing dict of raw spectra files from the file system
pD8_raw_data = dict()

#Populating the raw_test_data dict with all the read in raw opus files
##Using the last 5 characters, as they are the uniquely identifying portions of each of the file names
for i in pD8_raw_files:
    if not i.name.startswith('.DS_Store'):
        pD8_raw_data[f'pD8_{i.name[0:4]}'] = read_file(i)

print(pD8_raw_data.keys())

In [None]:
#Pulling in all wv data
water_vapor_files = list(path_to_water_vapor_data.iterdir())
water_vapor_files.sort()
#Initializing dict of wv_files from the file system
water_vapor_data = dict()

#Populating the water_vapor_data dict with all the read in wv opus files
#making sure names(keys) are distinct by subscripting
for i in (water_vapor_files):
    if not i.name.startswith('.DS_Store'):
        water_vapor_data[f'wv_{i.name[-6:len(i.name)]}_data'] = read_file(i)

print(water_vapor_data.keys())

In [None]:
#Pulling in config file for pD6 samples
pd8_config_df = pd.read_excel("../../data/prospecpy_config.xlsx", sheet_name="hyd2_pD_8")
#Cutting names in file_name column to match the imported files
pd8_config_df["file_name"] = pd8_config_df["file_name"].apply(lambda file_name: f'pD8_{file_name[0:4]}') 


#Indexing the config dataframe by file_name for simultaneous parsing with the pD6_raw_data dict below
indexed_pD8_config_df = pd8_config_df.set_index('file_name')
print(indexed_pD8_config_df)

In [None]:
#Initializing dict of post water vapor subtraction spectra
cut_range_sub_wv_data = dict()


for idx, row in indexed_pD8_config_df.iterrows():  
    print(idx)
    if idx in pD8_raw_data:
        raw_data_i = pD8_raw_data[idx]
        cut_range_sub_wv_data[f'{idx}_cut_range_sub_wv'] = cut_range_subtraction_multiple_wv(raw_data_i, water_vapor_data, row["range_start"], row["range_end"], SG_poly = 3, SG_points = 21)
    


In [None]:
#Creating Empty Dict for second derivative of cut and subtracted data
second_deriv_pD8_data = dict()

#Filling it with second derivatives of all the data
for i in cut_range_sub_wv_data:
    cut_range_sub_wv_data_i = cut_range_sub_wv_data[i]
    print(i)
    second_deriv_pD8_data[f'{i}_second_deriv'] = second_deriv(cut_range_sub_wv_data_i)
