In [34]:
%load_ext autoreload
%autoreload 2
import pickle
import os

import numpy as np
import pandas as pd

import sys
sys.path.append('src')

from result_utils import get_cnn_result
from hybrid_utils import run_svc, run_cascade, run_and, run_or, run_avg

DATA_DIR = "./data"
CNN_DIR = "./result/cnn/"
RESULT_DIR = "./result/"


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### CNN Performance

In [11]:
# Aggregating CNN results with different parameters
result_non_focal = get_cnn_result(cnn_dir=CNN_DIR, 
                                  folds=5, 
                                  gamma=0, 
                                  resolution=0.25, 
                                  learning_rate=0.001)

result_res_64 = get_cnn_result(cnn_dir=CNN_DIR,
                               folds=5, 
                               gamma=2, 
                               resolution=0.125, 
                               learning_rate=0.001)


result_res_256 = get_cnn_result(cnn_dir=CNN_DIR,
                                folds=5, 
                                gamma=2, 
                                resolution=0.5, 
                                learning_rate=0.001)

# Combining the results into a single dataframe
combined_results = pd.concat([result_non_focal, result_res_64, result_res_256])

# Iterating over different gamma and learning rate combinations
for gamma in [2, 3, 4, 5]:
    for lr in [0.005, 0.001, 0.0005]:
        temp_result = get_cnn_result(cnn_dir=CNN_DIR,
                                     folds=5, 
                                     gamma=gamma, 
                                     resolution=0.25, 
                                     learning_rate=lr)
        combined_results = pd.concat([combined_results, temp_result])

# Saving the aggregated results to a CSV file
combined_results.to_csv(os.path.join(RESULT_DIR, "result_cnn.csv"), index=False)

# Displaying the head of the combined results dataframe
combined_results.head()

Unnamed: 0,Res,Gamma,Lr,Group,Accuracy,Threshold,Balanced Accuracy,Phi Coefficient,Sensitivity,Specificity,ROC,ROC Boot Mean,ROC Boot CI 2.5,ROC Boot CI 97.5,ROC Boot p,Conf,TPR,FPR
0,0.25,0,0.001,Test,0.564,[ inf 9.9100870e-01 9.6703601e-01 9.6...,0.5187,0.0414,0.2899,0.7476,0.4967,0.4971,0.4049,0.5849,0.0889,[[77 26]\n [49 20]],[0. 0.01449275 0.01449275 0.02898551 0...,[0. 0. 0.02912621 0.02912621 0...
0,0.25,0,0.001,Valid,0.55,[ inf 9.8912650e-01 9.4855106e-01 9.2...,0.5011,0.0023,0.2727,0.7294,0.5519,0.5519,0.4523,0.6519,0.0,[[62 23]\n [40 15]],[0. 0.01818182 0.05454545 0.05454545 0...,[0. 0. 0. 0.02352941 0...
0,0.25,0,0.001,Train,0.9872,[ inf 9.9980193e-01 4.5676276e-01 3.4...,0.9842,0.9737,0.9683,1.0,0.9997,0.9997,0.999,1.0,0.0,[[327 0]\n [ 7 214]],[0. 0.00452489 0.99095023 0.99095023 0...,[0. 0. 0. 0.01223242 0...
0,0.125,2,0.001,Test,0.5407,[ inf 0.81100833 0.7807745 0.7440585 0...,0.5089,0.0184,0.3478,0.6699,0.485,0.4853,0.3993,0.5783,0.0,[[69 34]\n [45 24]],[0. 0. 0.02898551 0.02898551 0...,[0. 0.00970874 0.00970874 0.04854369 0...
0,0.125,2,0.001,Valid,0.5143,[ inf 0.81499946 0.7833994 0.766371 0...,0.4749,-0.0526,0.2909,0.6588,0.4573,0.4562,0.3615,0.5549,0.0,[[56 29]\n [39 16]],[0. 0. 0. 0.01818182 0...,[0. 0.01176471 0.02352941 0.02352941 0...


### SVM Performance

In [20]:
with open(os.path.join(DATA_DIR, "five_fold.pickle"), "rb") as handle:
    fold_file = pickle.load(handle)
    fold_group = pickle.load(handle)

In [25]:
df_metrics_svm, y_preds_svm_prob = run_svc(DATA_DIR=DATA_DIR,
                                           fold_file=fold_file)

Balanced Accuracy: 0.5303


### Hybrid cascade

In [26]:
df_metrics_cas, y_tests, coefs = run_cascade(DATA_DIR=DATA_DIR,
                                             CNN_DIR=CNN_DIR,
                                             fold_file=fold_file)

Balanced Accuracy: 0.6127


### Hybrid and-voting

In [29]:
df_metrics_and = run_and(CNN_DIR=CNN_DIR,
                         y_preds_svm_prob=y_preds_svm_prob,
                         y_tests=y_tests, 
                         coefs=coefs)

Balanced Accuracy: 0.5000


### Hybrid or-voting

In [32]:
df_metrics_or = run_or(CNN_DIR=CNN_DIR,
                       y_preds_svm_prob=y_preds_svm_prob,
                       y_tests=y_tests, 
                       coefs=coefs)

Balanced Accuracy: 0.6054


### Hybrid avg-voting

In [35]:
df_metrics_avg = run_avg(CNN_DIR=CNN_DIR,
                         y_preds_svm_prob=y_preds_svm_prob,
                         y_tests=y_tests, 
                         coefs=coefs)

Balanced Accuracy: 0.5019


### Combine all results

In [36]:
df_hybrid = pd.concat([df_metrics_svm, 
                       df_metrics_cas, 
                       df_metrics_and, 
                       df_metrics_or, 
                       df_metrics_avg])

df_hybrid.to_csv(os.path.join(RESULT_DIR, "result_hybrid.csv"), index=False)

### Repeat for with LAA

In [39]:
df_metrics_svm, y_preds_svm_prob = run_svc(DATA_DIR=DATA_DIR,
                                           fold_file=fold_file,
                                           LAA=True)

df_metrics_cas, y_tests, coefs = run_cascade(DATA_DIR=DATA_DIR,
                                             CNN_DIR=CNN_DIR,
                                             fold_file=fold_file,
                                             LAA=True)

df_metrics_and = run_and(CNN_DIR=CNN_DIR,
                         y_preds_svm_prob=y_preds_svm_prob,
                         y_tests=y_tests, 
                         coefs=coefs)

df_metrics_or = run_or(CNN_DIR=CNN_DIR,
                       y_preds_svm_prob=y_preds_svm_prob,
                       y_tests=y_tests, 
                       coefs=coefs)

df_metrics_avg = run_avg(CNN_DIR=CNN_DIR,
                         y_preds_svm_prob=y_preds_svm_prob,
                         y_tests=y_tests, 
                         coefs=coefs)

df_hybrid = pd.concat([df_metrics_svm, 
                       df_metrics_cas, 
                       df_metrics_and, 
                       df_metrics_or, 
                       df_metrics_avg])

df_hybrid.to_csv(os.path.join(RESULT_DIR, "result_LAA.csv"), index=False)

Balanced Accuracy: 0.4673
Balanced Accuracy: 0.6079
Balanced Accuracy: 0.4951
Balanced Accuracy: 0.6102
Balanced Accuracy: 0.4970
