In [1]:
# import modules
import numpy as np
import pickle
import pandas as pd

In [2]:
# initialisation (indexing for model retrieval)
num_reps = 100
config_ids = [36,37,38,39] # configuration ids of the optimal hyperparameter models
replication_ids = {}
failed_experiment_IDs = []

for index, config_id in enumerate(config_ids):
    experimentIDs = np.arange(index*num_reps+1,(index+1)*num_reps+1) # corresponding experiment IDs 
    # removing experiments that didn't run successfully
    experimentIDs = np.delete(experimentIDs,failed_experiment_IDs)
    replication_ids[config_id] = experimentIDs

In [3]:
# load appropriate data and obtain samples

# initialisation
date = '2025-02-04'
early_stopping = 100
root_dir = r"C:\Users\vm2218\OneDrive - Imperial College London\PhD Project\seaducks\experiments\hpc_runs\04-02-2025\model_test_data"
root_dir_model = r"C:\Users\vm2218\OneDrive - Imperial College London\PhD Project\seaducks\experiments\hpc_runs\04-02-2025\fit_models"
file_name_prefix = "long_experiment_"
file_name_suffix = f"_date_{date}_early_stopping_{early_stopping}"

In [4]:
SST_true_polar_false_config_ID = 38
SST_false_polar_false_config_ID = 36
SST_true_polar_true_config_ID = 39
SST_false_polar_true_config_ID = 37
invalid_vals = []

In [5]:
return_variables = ['lon','lat','id','time','u','v','config_id','replication_id','mu_1','mu_2','sigma_11','sigma_22','sigma_12']
testing_data = pd.DataFrame(columns=return_variables)

In [6]:
for config_id in config_ids:
    for ii in range(num_reps):
        with open(fr'{root_dir}/{file_name_prefix}{replication_ids[config_id][ii]}{file_name_suffix}_test_data.p', 'rb') as pickle_file:
                    data = pickle.load(pickle_file)
                    data_df = data[0]
                    means = data[1][0]
                    covs = data[1][1]
                    # adding info
                    data_df['config_id'] = config_id
                    data_df['replication_id'] = ii
                    data_df['mu_1'], data_df['mu_2'] = means[:,0] , means[:,1] 
                    data_df['sigma_11'], data_df['sigma_22'], data_df['sigma_12'] = covs[:,0,0], covs[:,1,1], covs[:,0,1]
                    outvars = data_df[return_variables]
                    testing_data = pd.concat([testing_data,outvars],ignore_index=True)

  testing_data = pd.concat([testing_data,outvars],ignore_index=True)


In [7]:
# add residuals
testing_data['e_1'] = testing_data['u']-testing_data['mu_1']
testing_data['e_2'] = testing_data['v']-testing_data['mu_2']

In [None]:
# angle off-set
testing_data['angle_offset'] = np.einsum('ij,ij->i',testing_data[['u','v']].values,testing_data[['mu_1','mu_2']].values)
testing_data['angle_offset'] = np.divide(testing_data['angle_offset'], np.multiply(
    np.linalg.norm(testing_data[['u','v']].values,axis=1),np.linalg.norm(testing_data[['mu_1','mu_2']].values,axis=1)
    ))
testing_data['angle_offset'] = np.clip(testing_data['angle_offset'],-1,1)
testing_data['angle_offset'] = np.arccos(testing_data['angle_offset'])
testing_data['angle_offset']=np.rad2deg(testing_data['angle_offset'])

In [35]:
testing_data

Unnamed: 0,lon,lat,id,time,u,v,config_id,replication_id,mu_1,mu_2,sigma_11,sigma_22,sigma_12,e_1,e_2,angle_offset
0,-40.074001,17.128000,5931,2004-01-23,-0.222312,-0.010590,36,0,-0.169313,-0.054741,0.005193,0.005639,0.000742,-0.052999,0.044150,15.189117
1,-40.223999,17.080999,5931,2004-01-24,-0.198919,-0.033666,36,0,-0.130805,-0.083413,0.005149,0.006301,0.000057,-0.068115,0.049746,22.919156
2,-40.373001,17.089001,5931,2004-01-25,-0.181937,-0.021531,36,0,-0.118064,-0.106737,0.006138,0.006287,0.000304,-0.063873,0.085206,35.366315
3,-40.745998,17.059999,5931,2004-01-28,-0.129302,0.035954,36,0,-0.122502,-0.026347,0.006224,0.006374,-0.000773,-0.006800,0.062300,27.676869
4,-40.841000,17.106001,5931,2004-01-29,-0.105011,0.073022,36,0,-0.138038,-0.049084,0.006305,0.006309,0.000222,0.033026,0.122106,54.388244
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16479735,-76.292000,29.379999,66710170,2019-03-21,-0.046842,0.356167,39,99,-0.084840,0.331551,0.028093,0.033678,-0.002380,0.037998,0.024616,6.861031
16479736,-76.293999,29.694000,66710170,2019-03-22,0.114705,0.415374,39,99,0.017519,0.314532,0.024882,0.028807,-0.002453,0.097185,0.100842,12.249326
16479737,-76.115997,29.992001,66710170,2019-03-23,0.224617,0.243579,39,99,0.071991,0.212304,0.024482,0.027246,-0.000168,0.152626,0.031275,23.949320
16479738,-75.927002,30.091000,66710170,2019-03-24,0.234772,0.025205,39,99,0.022109,0.126709,0.023121,0.025292,0.000391,0.212664,-0.101504,73.974584


In [15]:
# add absolute angle offset
testing_data['angle_offset'] = testing_data.apply(lambda row:                                                    
                                                        np.dot(
                                                            row[['u','v']].values[0], row[['mu_1','mu_2']].values[0]),axis=1)

KeyboardInterrupt: 

In [None]:
# add absolute angle offset
testing_data['angle_offset'] = testing_data.apply(lambda row: 
                                                  np.rad2deg(
                                                      np.arccos(
                                                      np.clip(                                                      
                                                        np.dot(
                                                            row[['u','v']].values[0], row[['mu_1','mu_2']].values[0])/
                                                            np.linalg.norm(row[['u','v']].values[0])*np.linalg.norm(row[['mu_1','mu_2']].values[0]),
                                                        -1,1)
                                                    )
                                                    ),axis=1)

In [8]:
row = testing_data.head(1)