# Results Tables

* **Error Metrics**: RMSE, MAE, MAAO, MAPE, RMSLE
* **Goodness of Fit**: NLL, PR coverage
* **Uncertainty**: PR area, confidence interval (and width)

In [41]:
# import modules
import numpy as np
import pickle
import scipy.stats as stats
import scipy.special
from scipy.stats import multivariate_normal
from spatial_plotting import rmse,mae,maao,mape,rmsle
from hypothesis_test_functions import generate_samples,generate_rmsle_samples,get_dists_and_data

In [2]:
# initialisation (indexing for model retrieval)
num_reps = 100
config_ids = [36,37,38,39] # configuration ids of the optimal hyperparameter models
replication_ids = {}
failed_experiment_IDs = []

for index, config_id in enumerate(config_ids):
    experimentIDs = np.arange(index*num_reps+1,(index+1)*num_reps+1) # corresponding experiment IDs 
    # removing experiments that didn't run successfully
    experimentIDs = np.delete(experimentIDs,failed_experiment_IDs)
    replication_ids[config_id] = experimentIDs

In [3]:
# load appropriate data and obtain samples

# initialisation
date = '2025-02-04'
early_stopping = 100
root_dir = r"C:\Users\vm2218\OneDrive - Imperial College London\PhD Project\seaducks\experiments\hpc_runs\04-02-2025\model_test_data"
root_dir_model = r"C:\Users\vm2218\OneDrive - Imperial College London\PhD Project\seaducks\experiments\hpc_runs\04-02-2025\fit_models"
file_name_prefix = "long_experiment_"
file_name_suffix = f"_date_{date}_early_stopping_{early_stopping}"

In [4]:
SST_true_polar_false_config_ID = 38
SST_false_polar_false_config_ID = 36
SST_true_polar_true_config_ID = 39
SST_false_polar_true_config_ID = 37
invalid_vals = []

## Error Metrics

In [None]:
metrics = {'RMSE':rmse,'MAE':mae,'MAAO':maao,'MAPE':mape, 'RMSLE':rmsle}

for metric_name in metrics.keys():

    print(f'\n .*+ --------------- {metric_name} --------------- +*.')
    metric = metrics[metric_name]

    if metric_name == 'RMSLE':
        X_SST_true_polar_false, X_SST_false_polar_false = generate_rmsle_samples(metric,SST_true_polar_false_config_ID,SST_false_polar_false_config_ID,
                                   num_reps, root_dir,file_name_prefix,replication_ids,file_name_suffix,invalid_vals=invalid_vals)
        X_SST_true_polar_true, X_SST_false_polar_true = generate_rmsle_samples(metric,SST_true_polar_true_config_ID,SST_false_polar_true_config_ID,
                                   num_reps, root_dir,file_name_prefix,replication_ids,file_name_suffix,invalid_vals=invalid_vals)
    else:
         X_SST_true_polar_false, X_SST_false_polar_false = generate_samples(metric,SST_true_polar_false_config_ID,SST_false_polar_false_config_ID,
                                   num_reps, root_dir,file_name_prefix,replication_ids,file_name_suffix,invalid_vals=invalid_vals)
         X_SST_true_polar_true, X_SST_false_polar_true = generate_samples(metric,SST_true_polar_true_config_ID,SST_false_polar_true_config_ID,
                                   num_reps, root_dir,file_name_prefix,replication_ids,file_name_suffix,invalid_vals=invalid_vals)

    SST_true_polar_false_name = 'SST: True | Polar: False'
    SST_false_polar_false_name = 'SST: False | Polar: False'
    SST_true_polar_true_name = 'SST: True | Polar: True'
    SST_false_polar_true_name = 'SST: False | Polar: True'

    sem_SST_true_polar_false = stats.sem(X_SST_true_polar_false)
    sem_SST_false_polar_false = stats.sem(X_SST_false_polar_false)
    sem_SST_true_polar_true = stats.sem(X_SST_true_polar_true)
    sem_SST_false_polar_true = stats.sem(X_SST_false_polar_true)

    sems = [sem_SST_true_polar_false, sem_SST_false_polar_false, sem_SST_true_polar_true, sem_SST_false_polar_true]
    names = [SST_true_polar_false_name,SST_false_polar_false_name,SST_true_polar_true_name,SST_false_polar_true_name]
    samples = [X_SST_true_polar_false, X_SST_false_polar_false,X_SST_true_polar_true, X_SST_false_polar_true]

    for ii in range(4):
        print(f"\n{names[ii]}\n{"_"*len(names[ii])}")
        print(f"{metric_name}: {np.mean(samples[ii]):.3f} +- {sems[ii]:.3f}")
        t_interval = stats.t.interval(confidence=0.95, df=len(samples[ii])-1, 
              loc=np.mean(samples[ii]), 
              scale=sems[ii])
        print(f"95 percent confidence interval for the mean of {metric_name}: {[round(val,2) for val in t_interval]}")
        print(f"    width: {t_interval[1]-t_interval[0]:.3f}")



 .*+ --------------- RMSE --------------- +*.

SST: True | Polar: False
________________________
RMSE: 14.543 +- 0.045
95 percent confidence interval for the mean of RMSE: [14.45, 14.63]
    width: 0.180

SST: False | Polar: False
_________________________
RMSE: 14.784 +- 0.046
95 percent confidence interval for the mean of RMSE: [14.69, 14.88]
    width: 0.182

SST: True | Polar: True
_______________________
RMSE: 14.534 +- 0.046
95 percent confidence interval for the mean of RMSE: [14.44, 14.62]
    width: 0.181

SST: False | Polar: True
________________________
RMSE: 14.750 +- 0.046
95 percent confidence interval for the mean of RMSE: [14.66, 14.84]
    width: 0.183

 .*+ --------------- MAE --------------- +*.

SST: True | Polar: False
________________________
MAE: 9.844 +- 0.030
95 percent confidence interval for the mean of MAE: [9.78, 9.9]
    width: 0.119

SST: False | Polar: False
_________________________
MAE: 9.964 +- 0.030
95 percent confidence interval for the mean of MAE

## Goodness of Fit

In [6]:
dists_SST_true_polar_false, y_SST_true_polar_false, dists_SST_false_polar_false, y_SST_false_polar_false = get_dists_and_data(SST_true_polar_false_config_ID,SST_false_polar_false_config_ID,
                                   num_reps, root_dir,root_dir_model,file_name_prefix,replication_ids,file_name_suffix,invalid_vals=invalid_vals)

dists_SST_true_polar_true, y_SST_true_polar_true, dists_SST_false_polar_true, y_SST_false_polar_true = get_dists_and_data(SST_true_polar_true_config_ID,SST_false_polar_true_config_ID,
                                   num_reps, root_dir,root_dir_model,file_name_prefix,replication_ids,file_name_suffix,invalid_vals=invalid_vals)

In [7]:
scipy_dists_SST_true_polar_false = [multivariate_normal(mean=dists_SST_true_polar_false[0][0][ii],cov=dists_SST_true_polar_false[0][1][ii]) for ii in range(dists_SST_true_polar_false[0][0].shape[0])]
scipy_dists_SST_false_polar_false = [multivariate_normal(mean= dists_SST_false_polar_false[0][0][ii],cov= dists_SST_false_polar_false[0][1][ii]) for ii in range( dists_SST_false_polar_false[0][0].shape[0])]
scipy_dists_SST_true_polar_true = [multivariate_normal(mean=dists_SST_true_polar_true[0][0][ii],cov=dists_SST_true_polar_true[0][1][ii]) for ii in range(dists_SST_true_polar_true[0][0].shape[0])]
scipy_dists_SST_false_polar_true = [multivariate_normal(mean=dists_SST_false_polar_true[0][0][ii],cov=dists_SST_false_polar_true[0][1][ii]) for ii in range(dists_SST_false_polar_true[0][0].shape[0])]

In [36]:
def NLL(dists, y):
    N = y[0].shape[0]
    nlls = [-dists[i].logpdf(y[0][i, :]) for i in range(N)]
    return nlls

def in_ci(error, sigma, alpha=0.95):
    crit_val = stats.chi2.ppf(alpha, sigma.shape[0])
    sigma_inv = np.linalg.inv(sigma)
    n = error.T @ sigma_inv @ error
    return n < crit_val

def coverage(dists, y, alpha=0.90):
    N = y[0].shape[0]
    covs = [dists[i].cov for i in range(N)]
    diffs = [y[0][i, :] - dists[i].mean for i in range(N)]
    ret = [in_ci(diff, cov, alpha=alpha) for diff, cov in zip(diffs, covs)]
    ret = sum(ret)/(len(ret))
    return ret

def matrix_area(mat, mult):
    p = mat.shape[0]
    return (
        2
        * (np.pi ** (p / 2))
        / scipy.special.gamma(p / 2)
        / p
        * (np.linalg.det(mat) ** 0.5)
        * mult ** (p / 2)
    )

def area(dists, y, alpha: float = 0.90):
    N = y[0].shape[0]
    covs = [dists[i].cov for i in range(N)]
    # TODO: Check this is correct for the area
    area = [matrix_area(cov, stats.chi2.ppf(alpha, cov.shape[1]))*100**2 for cov in covs] #check that putting chi2 here was the correct thing to do
    return area

In [38]:
goodness_of_fit_metrics = {'NLL':NLL, 'PR area':area, 'PR coverage':coverage}

for metric_name in goodness_of_fit_metrics.keys():

    print(f'\n .*+ --------------- {metric_name} --------------- +*.')
    metric = goodness_of_fit_metrics[metric_name]

    SST_true_polar_false_name = 'SST: True | Polar: False'
    SST_false_polar_false_name = 'SST: False | Polar: False'
    SST_true_polar_true_name = 'SST: True | Polar: True'
    SST_false_polar_true_name = 'SST: False | Polar: True'
    
    dists_SST_true_polar_false = metric(scipy_dists_SST_true_polar_false,y_SST_true_polar_false)
    dists_SST_false_polar_false = metric(scipy_dists_SST_false_polar_false,y_SST_false_polar_false)
    dists_SST_true_polar_true = metric(scipy_dists_SST_true_polar_true,y_SST_true_polar_true)
    dists_SST_false_polar_true = metric(scipy_dists_SST_false_polar_true,y_SST_false_polar_true)

    if metric_name != 'PR coverage':
        sem_SST_true_polar_false = stats.sem(dists_SST_true_polar_false)
        sem_SST_false_polar_false = stats.sem(dists_SST_false_polar_false)
        sem_SST_true_polar_true = stats.sem(dists_SST_true_polar_true)
        sem_SST_false_polar_true = stats.sem(dists_SST_false_polar_true)

        sems = [sem_SST_true_polar_false, sem_SST_false_polar_false, sem_SST_true_polar_true, sem_SST_false_polar_true]
    names = [SST_true_polar_false_name,SST_false_polar_false_name,SST_true_polar_true_name,SST_false_polar_true_name]
    samples = [dists_SST_true_polar_false, dists_SST_false_polar_false,dists_SST_true_polar_true, dists_SST_false_polar_true]

    for ii in range(4):
        print(f"\n{names[ii]}\n{"_"*len(names[ii])}")
        if metric_name == 'PR area':
            print(f"{metric_name}: {round(np.mean(samples[ii]))} +- {round(sems[ii])}")
        elif metric_name == 'PR coverage':
            print(f"{metric_name}: {np.mean(samples[ii]):.3f}")
        else:
            print(f"{metric_name}: {np.mean(samples[ii]):.3f} +- {sems[ii]:.3f}")




 .*+ --------------- NLL --------------- +*.

SST: True | Polar: False
________________________
NLL: -1.465 +- 0.009

SST: False | Polar: False
_________________________
NLL: -1.441 +- 0.010

SST: True | Polar: True
_______________________
NLL: -1.456 +- 0.011

SST: False | Polar: True
________________________
NLL: -1.426 +- 0.010

 .*+ --------------- PR area --------------- +*.

SST: True | Polar: False
________________________
PR area: 2252 +- 11

SST: False | Polar: False
_________________________
PR area: 2396 +- 13

SST: True | Polar: True
_______________________
PR area: 2262 +- 12

SST: False | Polar: True
________________________
PR area: 2390 +- 13

 .*+ --------------- PR coverage --------------- +*.

SST: True | Polar: False
________________________
PR coverage: 0.862

SST: False | Polar: False
_________________________
PR coverage: 0.863

SST: True | Polar: True
_______________________
PR coverage: 0.860

SST: False | Polar: True
________________________
PR coverage: 0.86

## Feature Importances

In [42]:
# load models
models = {}
for config_ID in config_ids:
    with open(fr'{root_dir_model}/{file_name_prefix}{replication_ids[config_ID][ii]}{file_name_suffix}.p', 'rb') as pickle_file:
                models[config_ID]= pickle.load(pickle_file)
                

In [59]:
SST_true_polar_false_name = 'SST: True | Polar: False'
SST_false_polar_false_name = 'SST: False | Polar: False'
SST_true_polar_true_name = 'SST: True | Polar: True'
SST_false_polar_true_name = 'SST: False | Polar: True'

names = [SST_true_polar_false_name,SST_false_polar_false_name,SST_true_polar_true_name,SST_false_polar_true_name]
params = ['mu_x', 'mu_y', 'a_11', 'a_12','a_22']
explanatory_var_labels = ['u_av','v_av','lat','lon','day_of_year','Wx','Wy','Tx','Ty','sst_x_derivative','sst_y_derivative']
config_ids = [SST_true_polar_false_config_ID,SST_false_polar_false_config_ID,SST_true_polar_true_config_ID,SST_false_polar_true_config_ID]

print(f'\n .*+ --------------- Feature Importance --------------- +*.')
for ii in range(4):
    print(f"\n{names[ii]}\n{"_"*len(names[ii])}")
    print("\nFeature Importances (Gini Index):")
    fi = models[config_ids[ii]].feature_importances_
    if ii%2 == 1:
        for jj,param in enumerate(params):
            print(f'\nFeature Importances for {param}')
            for ii, var in enumerate(explanatory_var_labels[:9]):
                print(f"{var}: {fi[jj][ii]:.2f}")
    else:
        for jj,param in enumerate(params):
            print(f'\nFeature Importances for {param}')
            for ii, var in enumerate(explanatory_var_labels):
                print(f"{var}: {fi[jj][ii]:.2f}")


 .*+ --------------- Feature Importance --------------- +*.

SST: True | Polar: False
________________________

Feature Importances (Gini Index):

Feature Importances for mu_x
u_av: 0.11
v_av: 0.07
lat: 0.27
lon: 0.24
day_of_year: 0.06
Wx: 0.03
Wy: 0.03
Tx: 0.03
Ty: 0.03
sst_x_derivative: 0.06
sst_y_derivative: 0.07

Feature Importances for mu_y
u_av: 0.07
v_av: 0.13
lat: 0.24
lon: 0.22
day_of_year: 0.07
Wx: 0.03
Wy: 0.03
Tx: 0.04
Ty: 0.03
sst_x_derivative: 0.08
sst_y_derivative: 0.06

Feature Importances for a_11
u_av: 0.10
v_av: 0.08
lat: 0.20
lon: 0.17
day_of_year: 0.10
Wx: 0.05
Wy: 0.05
Tx: 0.05
Ty: 0.05
sst_x_derivative: 0.07
sst_y_derivative: 0.07

Feature Importances for a_12
u_av: 0.09
v_av: 0.10
lat: 0.17
lon: 0.15
day_of_year: 0.08
Wx: 0.07
Wy: 0.05
Tx: 0.06
Ty: 0.06
sst_x_derivative: 0.08
sst_y_derivative: 0.09

Feature Importances for a_22
u_av: 0.09
v_av: 0.10
lat: 0.20
lon: 0.18
day_of_year: 0.10
Wx: 0.05
Wy: 0.05
Tx: 0.05
Ty: 0.04
sst_x_derivative: 0.07
sst_y_derivative