In [2]:
import numpy as np
import scipy
from scipy.io import loadmat, savemat
from sklearn.linear_model import LinearRegression
import os
from sklearn.utils import resample

In [4]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import spearmanr
from scipy.stats import pearsonr

def bootstrap_reg(model,x_train,y_train,x_test,y_test, times = 100):
    '''
    Perform bootstrapping, store results for further analysis and visualization
    :param x_train: training set X
    :param y_train: training set Y
    :param x_test: testing set X
    :param y_test: testing set Y
    :param featrue_eng: feature engineering method list to pass in feature engineering function
    :param times: how many times to bootstrap
    :return: dictionary of metrics, dict['<metric name>'] = [<values, length = fold>]
    '''
    mae_results = []
    rmse_results =[]
    r2_results = []
    pearson_results = []
    spearman_results =[]
    index = np.arange(x_train.shape[0])
    for i in range(times):
        boot_index = resample(index, replace=True, n_samples=None, random_state=9001+i)
        x_boot, y_boot = x_train[boot_index], y_train[boot_index]
        model.fit(x_boot,y_boot)
        y_true = y_test.reshape(-1,)
        y_preds = model.predict(x_test).reshape(-1,)
        
        mae = mean_absolute_error(y_true, y_preds)
        rmse = np.sqrt(mean_squared_error(y_true, y_preds))
        r2 = r2_score(y_true, y_preds)
        p_cor, p_p = pearsonr(y_true,y_preds)
        sp_cor, sp_p = spearmanr(y_true, y_preds)
        mae_results.append(mae)
        rmse_results.append(rmse)
        r2_results.append(r2)
        pearson_results.append(p_cor)
        spearman_results.append(sp_cor)
    
    #MAE
    mae_arr = np.array(mae_results)
    mean_mae = np.mean(mae_arr, axis=0)
    mae_arr_sorted = np.sort(mae_arr, axis=0)
    ci_low = mae_arr_sorted[round(0.025 * times)]
    ci_high = mae_arr_sorted[round(0.975 * times)]
    mae_result = {'result': mae_arr, 'mean': mean_mae, 'CI': [ci_low, ci_high]}
    

    # RMSE
    rmse_arr = np.array(rmse_results)
    mean_rmse = np.mean(rmse_arr, axis=0)
    rmse_arr_sorted = np.sort(rmse_arr, axis=0)
    ci_low = rmse_arr_sorted[round(0.025 * times)]
    ci_high = rmse_arr_sorted[round(0.975 * times)]
    rmse_result = {'result': rmse_arr, 'mean': mean_rmse, 'CI': [ci_low, ci_high]}

    # R2
    r2_arr = np.array(r2_results)
    mean_r2 = np.mean(r2_arr, axis=0)
    r2_arr_sorted = np.sort(r2_arr, axis=0)
    ci_low = r2_arr_sorted[round(0.025 * times)]
    ci_high = r2_arr_sorted[round(0.975 * times)]
    r2_result = {'result': r2_arr, 'mean': mean_r2, 'CI': [ci_low, ci_high]}

    # PR
    pearson_arr = np.array(pearson_results)
    mean_pearson = np.mean(pearson_arr, axis=0)
    pearson_arr_sorted = np.sort(pearson_arr, axis=0)
    ci_low = pearson_arr_sorted[round(0.025 * times)]
    ci_high = pearson_arr_sorted[round(0.975 * times)]
    pearson_result = {'result': pearson_arr, 'mean': mean_pearson, 'CI': [ci_low, ci_high]}

    # SR
    spearman_arr = np.array(spearman_results)
    mean_spearman = np.mean(spearman_arr, axis=0)
    spearman_arr_sorted = np.sort(spearman_arr, axis=0)
    ci_low = spearman_arr_sorted[round(0.025 * times)]
    ci_high = spearman_arr_sorted[round(0.975 * times)]
    #print(ci_low)
    #print(ci_low)
    spearman_result = {'result': spearman_arr, 'mean': mean_spearman, 'CI': [ci_low, ci_high]}
    
    boot_result = {'mae_result': mae_result, 'rmse_result': rmse_result,'r2_result': r2_result, 'pearson_result': pearson_result,'spearman_result': spearman_result}
    return boot_result

def _returnRow(list):
    return ';'.join([str(i) for i in list])

# Print results
def fillTable(boot_result, digit = 4):
    i=0
    one_row = [str(round(boot_result['mae_result']['mean'],digit)),
               '['+str(round(boot_result['mae_result']['CI'][0],digit))+','+str(round(boot_result['mae_result']['CI'][1],digit))+']',
               str(round(boot_result['rmse_result']['mean'],digit)),
               '['+str(round(boot_result['rmse_result']['CI'][0],digit))+','+str(round(boot_result['rmse_result']['CI'][1],digit))+']',
               str(round(boot_result['r2_result']['mean'],digit)),
               '['+str(round(boot_result['r2_result']['CI'][0],digit))+','+str(round(boot_result['r2_result']['CI'][1],digit))+']',
               str(round(boot_result['pearson_result']['mean'],digit)),
               '['+str(round(boot_result['pearson_result']['CI'][0],digit))+','+str(round(boot_result['pearson_result']['CI'][1],digit))+']',
                str(round(boot_result['spearman_result']['mean'],digit)),
               '['+str(round(boot_result['spearman_result']['CI'][0],digit))+','+str(round(boot_result['spearman_result']['CI'][1],digit))+']']
    print(_returnRow(one_row))

def saveresult(boot_result,file):
    np.save(file+'mae.npy',boot_result['mae_result']['result'])
    np.save(file+'rmse.npy',boot_result['rmse_result']['result'])
    np.save(file+'r2.npy',boot_result['r2_result']['result'])
    np.save(file+'pearson.npy',boot_result['pearson_result']['result'])
    np.save(file+'spearman.npy',boot_result['spearman_result']['result'])

In [6]:
#Dataset N -> Dataset 1, bootstrapping; MPS95
#Load Y
os.chdir('.\\Data')
dataset1_MPS95 = np.load('dataset1_MPS95.npy')
dataset2_MPS95 = np.load('dataset2_MPS95.npy')
dataset3_MPS95 = np.load('dataset3_MPS95.npy')
NHTSA_MPS95 = np.load('NHTSA_MPS95.npy')
NASCAR_MPS95 = np.load('NASCAR_MPS95.npy')
#Load X
metrics  = ['BAM','BrIC','CP','GAMBIT','HIC','HIP','PRHIC','RIC','SI','PCS','lin_acc_CG_max','ang_vel_max','ang_acc_max','Damage_C','RVCI','KLC','BRIC','CIBIC']
for metric in metrics:
    HM1_metric = loadmat('Lab impact1_BIC.mat')[metric]
    HM2_metric = loadmat('Lab impact2_BIC.mat')[metric]
    NFL53_metric = loadmat('Lab impact3_BIC.mat')[metric]
    dataset1_metric = np.row_stack((HM1_metric,HM2_metric, NFL53_metric))
    
    AF_metric = loadmat('CF1_BIC.mat')[metric]
    PAC12_metric = loadmat('CF2_BIC.mat')[metric]
    dataset2_metric = np.row_stack((AF_metric, PAC12_metric))
    
    MMA1_metric = loadmat('MMA1_BIC.mat')[metric]
    MMA2_metric = loadmat('MMA2_BIC.mat')[metric]
    dataset3_metric = np.row_stack((MMA1_metric, MMA2_metric))
    
    NHTSA_metric = loadmat('NHTSA_BIC.mat')['BIC'][metric].astype(np.double)
    NASCAR_metric = loadmat('NASCAR_BIC.mat')['BIC'][metric].astype(np.double)
    
    y_train= np.row_stack((dataset2_MPS95,dataset3_MPS95,NHTSA_MPS95,NASCAR_MPS95))
    y_test = dataset1_MPS95
    x_train = np.row_stack((dataset2_metric,dataset3_metric,NHTSA_metric,NASCAR_metric))
    x_test = dataset1_metric
    
    lr = LinearRegression()
    boot_result = bootstrap_reg(lr,x_train,y_train,x_test,y_test, times = 100)
    fillTable(boot_result,digit = 4)



0.0601;[0.0556,0.064];0.08;[0.0743,0.0849];0.2776;[0.1874,0.3823];0.9421;[0.9421,0.9421];0.9424;[0.9424,0.9424]
0.0244;[0.023,0.0275];0.0325;[0.0304,0.0376];0.8804;[0.8416,0.8958];0.9489;[0.9489,0.9489];0.9516;[0.9516,0.9516]
0.0766;[0.0741,0.0787];0.0944;[0.0909,0.0975];-0.0051;[-0.0714,0.077];0.7356;[0.7356,0.7356];0.9178;[0.9178,0.9178]
0.0585;[0.0542,0.0643];0.0826;[0.0775,0.0891];0.2294;[0.1211,0.3273];0.8429;[0.8429,0.8429];0.8202;[0.8202,0.8202]
0.0803;[0.0736,0.0852];0.1025;[0.0959,0.1081];-0.1873;[-0.2963,-0.0161];0.8014;[0.8014,0.8014];0.8102;[0.8102,0.8102]
0.0745;[0.0705,0.0793];0.0982;[0.094,0.1032];-0.0899;[-0.1817,0.004];0.8333;[0.8333,0.8333];0.835;[0.835,0.835]
0.0839;[0.0606,0.0877];0.1044;[0.0714,0.1103];-0.24;[-0.3669,0.4457];0.7846;[0.7846,0.7846];0.9441;[0.9441,0.9441]
0.0802;[0.0754,0.0838];0.0998;[0.0931,0.1052];-0.1243;[-0.2379,0.0313];0.8975;[0.8975,0.8975];0.976;[0.976,0.976]
0.0805;[0.0739,0.0853];0.1029;[0.0965,0.1083];-0.1958;[-0.3038,-0.0289];0.7953;[0.

In [None]:
###---Similar codes for other datasets---###