In [1]:
import sys
sys.path.append('../')

In [2]:
import itertools
import pandas as pd
import numpy as np
from tqdm.auto import tqdm, trange
from pathlib import Path

from models.train import test as test_clas
from models.train_reg import test as test_reg
from models.utils import ContagionDataset, set_seed, ConfusionMatrix, save_pickle, load_pickle
from sklearn.metrics import matthews_corrcoef, mean_squared_error
from models.results import ResultCollection

Using backend: pytorch


In [3]:
seed = 4444

metric_filter_1 = 'val_mcc'
metric_filter_2 = 'test_mcc'

data_dir_name = 'data'
save_path_name = 'saved_'
networks = ['sym_network', 'europe_network']

target = 'additional_stress'
dict_sets_lengths = {
    '75':(0.5, 0.25, 0.25),
    '40':(0.3, 0.1, 0.6),
    '10':(0.07, 0.03, 0.9),
}
dict_test_type = {
    'clas': test_clas,
    'reg': test_reg,
}


Get all models that will be tested

In [4]:
# col = ResultCollection()
# paths = list(itertools.chain.from_iterable([[k for k in Path(n).glob(f"models*/{save_path_name}*") if k.is_dir()] for n in networks]))
network_paths = [[k for k in Path(n).glob(f"models*/{save_path_name}*") if k.is_dir()] for n in networks]

In [6]:
col_results = {}

for n,paths in zip(networks, network_paths):
    col = ResultCollection()
    col_results[n] = col

    for p in paths:
        print(p)
        name = p.name.split(save_path_name)[1]
        _,test_type,sets_type = p.parent.name.split('_')
        sets_lengths = dict_sets_lengths[sets_type]
        data_dir = p.parent.parent.joinpath(data_dir_name)
        
        set_seed(seed)
        dataset_val = ContagionDataset(
            raw_dir=data_dir,
            drop_edges=0,
            sets_lengths=sets_lengths,
            target = target,
            add_self_loop=True,
        )

        r = dict_test_type[test_type](
            dataset=dataset_val,
            save_path=str(p),
            n_runs=1,
            debug_mode=False,
            use_cpu=False,
            save=True,
            use_edge_weight=True,
        )

        result = col.add(r[2], f"{p.parent.name}_{name}")


sym_network\models_clas_10\saved_fnn


100%|██████████| 743/743 [00:29<00:00, 25.58it/s]


sym_network\models_clas_10\saved_gat


In [None]:
# col_results = load_pickle('results.pickle')

In [None]:
r = {k:v.df('test_mcc', True) for k,v in col_results.items()}
r

In [None]:
r['sym_network'].sort_values(axis=0, by='test_mcc', ascending=not True, na_position='last')

# separar networks

In [None]:
for p in paths:
    name = p.name.split(save_path_name)[1]
    sets_type = p.parent.name.split('_')[2]
    sets_lengths = dict_sets_lengths[sets_type]
    data_dir = p.parent.parent.joinpath(data_dir_name)
    
    set_seed(seed)
    dataset_val = ContagionDataset(
        raw_dir=data_dir,
        drop_edges=0,
        sets_lengths=sets_lengths,
        target = target,
        add_self_loop=True,
    )

    r = test(
        dataset=dataset_val,
        save_path=str(p),
        n_runs=1,
        debug_mode=False,
        use_cpu=False,
        save=True,
        use_edge_weight=True,
    )

    result = col.add(r[2], f"{p.parent.name}_{name}")

    break

In [None]:
result.data[0].keys()

In [None]:
result.df_metrics_sort('test_mcc')

In [None]:
result.data[0]['dict']['path_name']

In [None]:
result.save_best('test_mcc', '.',)