Notebook to read all experimentations

In [9]:
import pickle
import pandas as pd
from fnmatch import fnmatch
import os

In [10]:
"""
Function to read a single experiment from file
"""
def read_experiment(model_name, target_column, max_time, is_homogenous_dataset=False, experiment_name=None):
    if is_homogenous_dataset:
        main_path = 'results/homogenous_data'
    else:
        main_path = 'results/non_homogenous_data'
        
    model_path = f'{main_path}/{target_column}/{max_time}/{model_name}'

    if experiment_name is None:
        model_path = model_path + '/defaultExperiment'
    else:
        model_path = model_path + f'/{experiment_name}'
        
    # Read model
    with open(f'{model_path}/model.sav', 'rb') as f:
        model = pickle.load(f)
    
    # Read auc statistic
    with open(f'{model_path}/auc_statistics.pkl', 'rb') as f:
        auc_stats = pickle.load(f)
    return model, auc_stats

In [11]:
# read all auc statistics and save as csv
root = 'results\\non_homogenous_data'
pattern = "*_statistics.pkl"
all_auc_statistics = pd.DataFrame()
for path, subdirs, files in os.walk(root):
    for name in files:
        if fnmatch(name, pattern):
            filename = os.path.join(path, name)

            myfile = open(filename,"rb")
            temp = pickle.load(myfile)
            myfile.close()
            
            subdirectories = filename.split('\\')
            dataset_type = subdirectories[1]
            target_column = subdirectories[2]
            months_trained = subdirectories[3]
            model_name = subdirectories[4]
            experiment_name = subdirectories[5]
            
            temp['dataset_type'] = dataset_type
            temp['target_column'] = target_column
            temp['months_trained'] = months_trained
            temp['model_name'] = model_name
            temp['experiment_name'] = experiment_name
            
            all_auc_statistics = pd.concat([all_auc_statistics, temp])

all_auc_statistics.to_csv('all_auc_statistics.csv', index=False)

In [12]:
all_auc_statistics

Unnamed: 0,month,train_auc,test_auc,dataset_type,target_column,months_trained,model_name,experiment_name
0,6,0.945358,0.939377,non_homogenous_data,bcr,108,LogisticRegression,defaultExperiment
1,12,0.909996,0.888244,non_homogenous_data,bcr,108,LogisticRegression,defaultExperiment
2,18,0.891807,0.878229,non_homogenous_data,bcr,108,LogisticRegression,defaultExperiment
3,24,0.888124,0.863556,non_homogenous_data,bcr,108,LogisticRegression,defaultExperiment
4,30,0.876179,0.864646,non_homogenous_data,bcr,108,LogisticRegression,defaultExperiment
...,...,...,...,...,...,...,...,...
10,66,0.998917,0.933333,non_homogenous_data,mts,96,XGBClassifier,Experiment 001
11,72,0.998503,0.933781,non_homogenous_data,mts,96,XGBClassifier,Experiment 001
12,78,0.998088,0.935347,non_homogenous_data,mts,96,XGBClassifier,Experiment 001
13,84,0.997699,0.935347,non_homogenous_data,mts,96,XGBClassifier,Experiment 001
