In [1]:
import pandas as pd
from matplotlib import pyplot as plt

folder = 'H:\\My Drive\\PROJECTS\\PSI 2022-2025\\XRF fundamentals vs. MVA'

In [8]:
summary = pd.read_csv(f'{folder}\\data\\dataset_summary.csv')
df = pd.read_csv(f'{folder}\\models\\cumulative_modelling_train_test_results.csv')
# drop PCR-py, I shouldn't have run it
test_df = df[df['model type']!='PCR-py'].copy().reset_index(drop=True)

# get %RMSEP from median concentration
test_df = test_df.merge(summary[['element','median']], how='left', left_on='variable', right_on='element').drop(columns='element')
for var in ['RMSE-P','RMSE-CV','RMSE-C']:
    test_df['%'+var] = (test_df[var] / test_df['median'])*100
test_df.drop(columns='median', inplace=True)

# set R2 < 0 to 0
for var in ['R2 test','R2 train']:
    test_df[var] = [0 if x < 0 else x for x in test_df[var]]
    
# summarize
summary_df = test_df.groupby(['filter','model type'], as_index=False)['%RMSE-P'].mean().rename(columns={'%RMSE-P':'mean_%RMSE-P'})
summary_df['stdev_%RMSE-P'] = list(test_df.groupby(['filter','model type'])['%RMSE-P'].std())

# sort based on my choice
summary_df['model_cat'] = pd.Categorical(
    summary_df['model type'], 
    categories=['OLS','PLS','LASSO','Ridge','ElasticNet','OMP','PCR-lin'], 
    ordered=True
)
summary_df = summary_df.sort_values('model_cat', ignore_index=True)
summary_df.to_csv(f'{folder}\\models\\summarized_results.csv', index=False)

In [11]:
cs=['#dead08','#1b6f78','#78241b']

fig,ax = plt.subplots(figsize=(10,6))

i=0
for f in summary_df['filter'].unique():
    temp = summary_df[summary_df['filter']==f].copy()

    ax.plot(temp['model type'], temp['mean_%RMSE-P'], color=cs[i], label=f)
    ax.errorbar(temp['model type'], temp['mean_%RMSE-P'], capsize=2, alpha=0.5, color=cs[i], yerr=temp['stdev_%RMSE-P'])
    i+=1
    
ax.semilogy()
plt.legend()
plt.savefig(f'{folder}\\figures\\model_comparison.eps', dpi=600)
plt.close()

The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript back

In [13]:
# zoom in plot
zoom = ['PLS','LASSO','Ridge','ElasticNet']

fig,ax = plt.subplots(figsize=(6,4))

i=0
for f in summary_df['filter'].unique():
    temp = summary_df[(summary_df['filter']==f)&(summary_df['model type'].isin(zoom))].copy()

    ax.plot(temp['model type'], temp['mean_%RMSE-P'], color=cs[i], label=f)
    ax.errorbar(temp['model type'], temp['mean_%RMSE-P'], capsize=2, alpha=0.5, color=cs[i], yerr=temp['stdev_%RMSE-P'])
    i+=1
    
ax.semilogy()

plt.savefig(f'{folder}\\figures\\model_comparison_zoom.eps', dpi=600)
plt.close()

The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript back