In [12]:
import pandas as pd
from LOD_paper_tools import *
from tqdm.notebook import tqdm

fp = "G:\\My Drive\\Darby Work\\Ytsma and Dyar 2021 (LOD paper)\\"

#### Compositions and reference keys

In [2]:
# generate comps
comps_path = fp + "tables\\TableS1_sample_compositions.xlsx"
lanl_comps = pd.read_excel(comps_path, sheet_name = "LANL")
mhc_comps = pd.read_excel(comps_path, sheet_name = "MHC")
comps = pd.merge(mhc_comps, lanl_comps, how = "outer") # merge comps
comps.columns = comps.columns.map(lambda x: x.split()[0])
comps = comps.drop_duplicates(subset = 'Sample') # remove duplicates
comps['Sample'] = comps['Sample'].astype(str)
comps = comps.sort_values(by='Sample')
comps = comps.replace(np.nan, "", regex=True)
cols = comps.columns.drop('Sample')
comps[cols] = comps[cols].apply(pd.to_numeric) # make columns numeric

# make dictionary of spectrum names to sample names
key_path = fp + "ChemLIBS_spectrum_no_to_name.csv"
mhc_key = pd.read_csv(key_path)
mhc_key = pd.Series(mhc_key.Sample.values, index=mhc_key.pkey).to_dict()

## Calculate sensitivities
### Braga method

In [3]:
# define blank/noise regions in ChemLIBS and ChemCam spectra
blank_braga = pd.read_csv(fp + "figures\\braga_noise_regions.csv") 

# import all spectra
cl_earth = pd.read_csv(fp + "CL_all_Earth_spectra.csv")
cl_mars = pd.read_csv(fp + "CL_all_Mars_spectra.csv")
cl_vacuum = pd.read_csv(fp + "CL_all_Vacuum_spectra.csv")
cc_mars = pd.read_csv(fp + "CC_all_Mars_spectra.csv")

In [4]:
df_list = [cc_mars, cl_earth, cl_mars, cl_vacuum]

print("Datasets cleaned:")
for df in tqdm(df_list):
    print("Rows cleaned:")
    for row in tqdm(df.index):
        nm = df['wave'][row]
        # remove rows below first region
        if nm < blank_braga['start'][0]:
            df.drop(row, axis = 'index', inplace=True)
        # remove rows after last region
        elif nm > blank_braga['stop'][len(blank_braga)-1]:
            df.drop(row, axis = 'index', inplace=True)
        # remove rows between the regions
        for region in range(len(blank_braga)-1):
            if (nm > blank_braga['stop'][region]) & (nm < blank_braga['start'][region+1]):
                df.drop(row, axis = 'index', inplace=True)

Datasets cleaned:


  0%|          | 0/4 [00:00<?, ?it/s]

Rows cleaned:


  0%|          | 0/5485 [00:00<?, ?it/s]

Rows cleaned:


  0%|          | 0/5485 [00:00<?, ?it/s]

Rows cleaned:


  0%|          | 0/5485 [00:00<?, ?it/s]

Rows cleaned:


  0%|          | 0/5485 [00:00<?, ?it/s]

In [5]:
# summarise stdev across each row, take average
cc_mars_sens = round(cc_mars.set_index('wave').std(axis=1).mean(),9)
cl_mars_sens = round(cl_mars.set_index('wave').std(axis=1).mean(),9)
cl_earth_sens = round(cl_earth.set_index('wave').std(axis=1).mean(),9)
cl_vacuum_sens = round(cl_vacuum.set_index('wave').std(axis=1).mean(),9)

sens_list = [cc_mars_sens, cl_mars_sens, cl_earth_sens, cl_vacuum_sens]
inst_list = ["LANL", 'ChemLIBS', 'ChemLIBS', 'ChemLIBS']
atm_list = ["Mars", "Mars", "Earth", "Vacuum"]

# make dataframe
braga_sensitivities = pd.DataFrame({
    "instrument" : inst_list,
    "atmosphere" : atm_list,
    "sensitivity" : sens_list
})

### Metals method
#### LANL

In [6]:
# read in LANL blank spectra
folder = fp + "LANL calculations\\metals background\\norm\\"
spectra_list = os.listdir(folder)[:9]
spectra = {}
sheet_list = [1,2,3,4]

blank_list = []
spectrum_n = []
sens_list = []

for file in tqdm(spectra_list):
    # read data
    path = (folder + "\\" + file)
    name = file.split("_")[0] + "_" + file.split("_")[1]
    spectra[name] = pd.read_excel(path, sheet_name = sheet_list)
    
    for sheet in sheet_list:
        # remove extra columns for when I calculated by hand
        spectra[name][sheet].drop(spectra[name][sheet].columns[[0,1,2]], axis=1, inplace=True)
        
        # calculate sensitivity
        sensitivity = round(spectra[name][sheet].std(axis=1).mean(),9)
        
        # add to list
        blank_list.append(name)
        spectrum_n.append(sheet)
        sens_list.append(sensitivity)
        
# make dataframe of all results
lanl_sens_df = pd.DataFrame({
    "blank" : blank_list,
    "spectrum" : spectrum_n,
    "sensitivity" : sens_list
})

# get average for LANL metals method
lanl_metal_sens = round(lanl_sens_df['sensitivity'].mean(),9)

  0%|          | 0/9 [00:00<?, ?it/s]

#### ChemLIBS

In [7]:
cl_data = pd.read_excel(fp + "ChemLIBS calculations\\background_metals_030421.xlsx", sheet_name = None)

In [8]:
sheet_list = np.arange(start = 1, stop = len(cl_data))

element_list = []
atm_list = []
sens_list = []

for sheet in sheet_list:
    # get relevant info
    name = list(cl_data.keys())[sheet]
    element = name.split("_")[0]
    atmosphere = name.split("_")[1]
    
    # drop unneeded columns and rows
    cl_data[name].drop(cl_data[name].columns[[0,1]], axis=1, inplace=True)
    cl_data[name].drop(cl_data[name].index[[0]], inplace=True)
    
    # calculate sensitivity
    sensitivity = round(cl_data[name].std(axis=1).mean(),9)

    # add to list
    element_list.append(element)
    atm_list.append(atmosphere)
    sens_list.append(sensitivity)

# make dataframe of all results
mhc_sens_df = pd.DataFrame({
    "element" : element_list,
    "atmosphere" : atm_list,
    "sensitivity" : sens_list
})

# get average for MHC metals method
mhc_mars_metal_sens = round(mhc_sens_df[mhc_sens_df['atmosphere'] == 'Mars']['sensitivity'].mean(), 9)
mhc_earth_metal_sens = round(mhc_sens_df[mhc_sens_df['atmosphere'] == 'Earth']['sensitivity'].mean(), 9)
mhc_vac_metal_sens = round(mhc_sens_df[mhc_sens_df['atmosphere'] == 'Vac']['sensitivity'].mean(), 9)

In [9]:
sens_list = [lanl_metal_sens, mhc_mars_metal_sens, mhc_earth_metal_sens, mhc_vac_metal_sens]
inst_list = ["LANL", 'ChemLIBS', 'ChemLIBS', 'ChemLIBS']
atm_list = ["Mars", "Mars", "Earth", "Vacuum"]

# make dataframe
metals_sensitivities = pd.DataFrame({
    "instrument" : inst_list,
    "atmosphere" : atm_list,
    "sensitivity" : sens_list
})

In [10]:
# combine sensitivity results
metals_sensitivities['method'] = "metals"
braga_sensitivities['method'] = "braga"

sensitivities = pd.concat([metals_sensitivities, braga_sensitivities]).reset_index(drop=True)

In [None]:
envs = [['LANL', 'Mars'],['ChemLIBS', 'Mars'],['ChemLIBS', 'Earth'],['ChemLIBS', 'Vacuum']]

for env in envs:
    # calculate results per model
    results_0_750 = get_results(sensitivities, env[0], env[1], '0-750')
    results_250_1000 = get_results(sensitivities, env[0], env[1], '250-1000')
    
    # get aggregate results
    detail_results = pd.concat([results_0_750, results_250_1000], ignore_index=True).drop(columns = 'vector')
    avg = detail_results.groupby('element', as_index=False).mean()
    stdev = detail_results.groupby('element', as_index=False).std()
    sd_list = [i + '_sd' for i in stdev.columns[1:]]
    sd_list.insert(0, 'element')
    stdev.columns = sd_list
    avg_results = pd.merge(avg, stdev, how='outer',on='element')
    
    # add environment information
    detail_results.insert(loc=1, column='instrument', value=env[0])
    detail_results.insert(loc=2, column='atmosphere', value=env[1])
    avg_results.insert(loc=1, column='instrument', value=env[0])
    avg_results.insert(loc=2, column='atmosphere', value=env[1])
    
    # update full table
    full_avg_results = avg_results if env == envs[0] else pd.concat([full_avg_results, avg_results], ignore_index=True)
    full_detail_results = detail_results if env == envs[0] else pd.concat([full_detail_results, detail_results], ignore_index=True)

### Export

In [None]:
full_path = fp + "averaged_LOD_RMSEP_results.csv"
detail_path = fp + "detailed_LOD_RMSEP_results.csv"
full_avg_results.to_csv(full_path, index=False)
full_detail_results.to_csv(detail_path, index=False)