In [None]:
import numpy as np
import scipy
from scipy.io import loadmat, savemat
from sklearn.linear_model import LinearRegression
import os
from sklearn.utils import resample

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import spearmanr
from scipy.stats import pearsonr

def bootstrap_reg(model,x_train,y_train,x_test,y_test, times = 100):
    '''
    Perform bootstrapping, store results for further analysis and visualization
    :param x_train: training set X
    :param y_train: training set Y
    :param x_test: testing set X
    :param y_test: testing set Y
    :param featrue_eng: feature engineering method list to pass in feature engineering function
    :param times: how many times to bootstrap
    :return: dictionary of metrics, dict['<metric name>'] = [<values, length = fold>]
    '''
    mae_results = []
    rmse_results =[]
    r2_results = []
    pearson_results = []
    spearman_results =[]
    index = np.arange(x_train.shape[0])
    for i in range(times):
        boot_index = resample(index, replace=True, n_samples=None, random_state=9001+i)
        x_boot, y_boot = x_train[boot_index], y_train[boot_index]
        model.fit(x_boot,y_boot)
        y_true = y_test.reshape(-1,)
        y_preds = model.predict(x_test).reshape(-1,)
        
        mae = mean_absolute_error(y_true, y_preds)
        rmse = np.sqrt(mean_squared_error(y_true, y_preds))
        r2 = r2_score(y_true, y_preds)
        p_cor, p_p = pearsonr(y_true,y_preds)
        sp_cor, sp_p = spearmanr(y_true, y_preds)
        mae_results.append(mae)
        rmse_results.append(rmse)
        r2_results.append(r2)
        pearson_results.append(p_cor)
        spearman_results.append(sp_cor)
    
    #MAE
    mae_arr = np.array(mae_results)
    mean_mae = np.mean(mae_arr, axis=0)
    mae_arr_sorted = np.sort(mae_arr, axis=0)
    ci_low = mae_arr_sorted[round(0.025 * times)]
    ci_high = mae_arr_sorted[round(0.975 * times)]
    mae_result = {'result': mae_arr, 'mean': mean_mae, 'CI': [ci_low, ci_high]}
    

    # RMSE
    rmse_arr = np.array(rmse_results)
    mean_rmse = np.mean(rmse_arr, axis=0)
    rmse_arr_sorted = np.sort(rmse_arr, axis=0)
    ci_low = rmse_arr_sorted[round(0.025 * times)]
    ci_high = rmse_arr_sorted[round(0.975 * times)]
    rmse_result = {'result': rmse_arr, 'mean': mean_rmse, 'CI': [ci_low, ci_high]}

    # R2
    r2_arr = np.array(r2_results)
    mean_r2 = np.mean(r2_arr, axis=0)
    r2_arr_sorted = np.sort(r2_arr, axis=0)
    ci_low = r2_arr_sorted[round(0.025 * times)]
    ci_high = r2_arr_sorted[round(0.975 * times)]
    r2_result = {'result': r2_arr, 'mean': mean_r2, 'CI': [ci_low, ci_high]}

    # PR
    pearson_arr = np.array(pearson_results)
    mean_pearson = np.mean(pearson_arr, axis=0)
    pearson_arr_sorted = np.sort(pearson_arr, axis=0)
    ci_low = pearson_arr_sorted[round(0.025 * times)]
    ci_high = pearson_arr_sorted[round(0.975 * times)]
    pearson_result = {'result': pearson_arr, 'mean': mean_pearson, 'CI': [ci_low, ci_high]}

    # SR
    spearman_arr = np.array(spearman_results)
    mean_spearman = np.mean(spearman_arr, axis=0)
    spearman_arr_sorted = np.sort(spearman_arr, axis=0)
    ci_low = spearman_arr_sorted[round(0.025 * times)]
    ci_high = spearman_arr_sorted[round(0.975 * times)]
    spearman_result = {'result': spearman_arr, 'mean': mean_spearman, 'CI': [ci_low, ci_high]}
    
    boot_result = {'mae_result': mae_result, 'rmse_result': rmse_result,'r2_result': r2_result, 'pearson_result': pearson_result,'spearman_result': spearman_result}
    return boot_result

def _returnRow(list):
    return ';'.join([str(i) for i in list])

# Print results
def fillTable(boot_result, digit = 4):
    i=0
    one_row = [str(round(boot_result['mae_result']['mean'],digit)),
               '['+str(round(boot_result['mae_result']['CI'][0],digit))+','+str(round(boot_result['mae_result']['CI'][1],digit))+']',
               str(round(boot_result['rmse_result']['mean'],digit)),
               '['+str(round(boot_result['rmse_result']['CI'][0],digit))+','+str(round(boot_result['rmse_result']['CI'][1],digit))+']',
               str(round(boot_result['r2_result']['mean'],digit)),
               '['+str(round(boot_result['r2_result']['CI'][0],digit))+','+str(round(boot_result['r2_result']['CI'][1],digit))+']',
               str(round(boot_result['pearson_result']['mean'],digit)),
               '['+str(round(boot_result['pearson_result']['CI'][0],digit))+','+str(round(boot_result['pearson_result']['CI'][1],digit))+']',
                str(round(boot_result['spearman_result']['mean'],digit)),
               '['+str(round(boot_result['spearman_result']['CI'][0],digit))+','+str(round(boot_result['spearman_result']['CI'][1],digit))+']']
    print(_returnRow(one_row))

def saveresult(boot_result,file):
    np.save(file+'mae.npy',boot_result['mae_result']['result'])
    np.save(file+'rmse.npy',boot_result['rmse_result']['result'])
    np.save(file+'r2.npy',boot_result['r2_result']['result'])
    np.save(file+'pearson.npy',boot_result['pearson_result']['result'])
    np.save(file+'spearman.npy',boot_result['spearman_result']['result'])

In [None]:
#Dataset 1, bootstrapping
from sklearn.model_selection import train_test_split
#Load Y
dataset_MPS95 = np.load('Lab impact_MPS95.npy')
#Load X
metrics  = ['BAM','BrIC','CP','GAMBIT','HIC','HIP','PRHIC','RIC','SI','PCS','lin_acc_CG_max','ang_vel_max','ang_acc_max','Damage_C','RVCI','KLC','BRIC','CIBIC']
for metric in metrics:
    HM1_metric = loadmat('Lab impact1_BIC.mat')[metric]
    HM2_metric = loadmat('Lab impact2_BIC.mat')[metric]
    NFL53_metric = loadmat('Lab impact3_BIC.mat')[metric]
    dataset_metric = np.row_stack((HM1_metric,HM2_metric, NFL53_metric))
    Y = dataset_MPS95
    X = dataset_metric

    x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size = 0.2, random_state=9001)
    lr = LinearRegression()
    boot_result = bootstrap_reg(lr,x_train,y_train,x_test,y_test, times = 100)
    saveresult(boot_result,'dataset1_MPS95_'+metric)
    fillTable(boot_result,digit = 4)

In [None]:
#Dataset 1, bootstrapping
from sklearn.model_selection import train_test_split
#Load Y
dataset_MPSCC95 = np.load('Lab impact_MPSCC95.npy')
#Load X
metrics  = ['BAM','BrIC','CP','GAMBIT','HIC','HIP','PRHIC','RIC','SI','PCS','lin_acc_CG_max','ang_vel_max','ang_acc_max','Damage_C','RVCI','KLC','BRIC','CIBIC']
for metric in metrics:
    HM1_metric = loadmat('Lab impact1_BIC.mat')[metric]
    HM2_metric = loadmat('Lab impact2_BIC.mat')[metric]
    NFL53_metric = loadmat('Lab impact3_BIC.mat')[metric]
    dataset_metric = np.row_stack((HM1_metric,HM2_metric, NFL53_metric))
    Y = dataset_MPSCC95
    X = dataset_metric

    x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size = 0.2, random_state=9001)
    lr = LinearRegression()
    boot_result = bootstrap_reg(lr,x_train,y_train,x_test,y_test, times = 100)
    saveresult(boot_result,'dataset1_MPSCC95_'+metric)
    fillTable(boot_result,digit = 4)

In [None]:
#Dataset 1, bootstrapping
from sklearn.model_selection import train_test_split
#Load Y
dataset_CSDM = np.load('Lab impact_CSDM.npy')
#Load X
metrics  = ['BAM','BrIC','CP','GAMBIT','HIC','HIP','PRHIC','RIC','SI','PCS','lin_acc_CG_max','ang_vel_max','ang_acc_max','Damage_C','RVCI','KLC','BRIC','CIBIC']
for metric in metrics:
    HM1_metric = loadmat('Lab impact1_BIC.mat')[metric]
    HM2_metric = loadmat('Lab impact2_BIC.mat')[metric]
    NFL53_metric = loadmat('Lab impact3_BIC.mat')[metric]
    dataset_metric = np.row_stack((HM1_metric,HM2_metric, NFL53_metric))dataset_metric = np.load('dataset1_'+metric+'.npy')
    #print(dataset_metric.shape)
    Y = dataset_CSDM
    X = dataset_metric

    x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size = 0.2, random_state=9001)
    lr = LinearRegression()
    boot_result = bootstrap_reg(lr,x_train,y_train,x_test,y_test, times = 100)
    saveresult(boot_result,'dataset1_CSDM_'+metric)
    fillTable(boot_result,digit = 4)

In [None]:
###---Similar codes for CF/MMA/NHTSA/NASCAR---###