In [1]:
%matplotlib inline

In [2]:
import pickle
from collections import defaultdict

import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from networkx.algorithms.approximation import clique
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import Normalizer
from tqdm.notebook import tqdm

from utils import SBM_Data, Datasets_Data, load_or_calc_and_save, ytrue_to_partition, calc_avranks, RFE, RFE_LOO

# LOO

In [3]:
datasets_data_hub = Datasets_Data()
_, datasets_results_modularity_any3, datasets_modularity_results = datasets_data_hub.load_precalculated()
X_val, y_val, _, _, _, _, feature_names = datasets_data_hub.make_dataset(datasets_results_modularity_any3)

for i, name in enumerate(feature_names[:-25]):
    was_logged = np.array(datasets_data_hub.features_to_log)[datasets_data_hub.allowed_features][i]
    X_val[:, i] = ((X_val[:, i] - np.mean(X_val[:, i])) if was_logged else X_val[:, i]) / np.std(X_val[:, i])

100%|██████████| 24/24 [00:00<00:00, 1891.46it/s]

wrapper: cache file ../../cache/cache/feature_importance/cora_DB.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/cora_EC.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/cora_HA.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/cora_HCI.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/cora_IR.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/cora_Net.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/dolphins.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/eu-core.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/eurosis.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/football.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance




In [4]:
estimator = LogisticRegression(max_iter=10000)
selector = RFE_LOO(estimator, feature_names)
selector = selector.fit(X_val, y_val)

  0%|          | 0/18 [00:00<?, ?it/s]

all features, acc=0.871, f1=0.062


100%|██████████| 18/18 [00:50<00:00,  2.81s/it]
  0%|          | 0/153 [00:00<?, ?it/s]

1 features, set=('avg_deg',) acc=0.891, f1=0.039


100%|██████████| 153/153 [08:37<00:00,  3.38s/it]
  0%|          | 0/816 [00:00<?, ?it/s]

2 features, set=('avg(deg | deg > avg_deg)', 'median_deg') acc=0.898, f1=0.048


100%|██████████| 816/816 [1:06:42<00:00,  4.90s/it]
  0%|          | 0/3060 [00:00<?, ?it/s]

3 features, set=('median_deg', 'avg_sp', 'median_sp') acc=0.927, f1=0.061


100%|██████████| 3060/3060 [4:55:43<00:00,  5.80s/it]  

4 features, set=('modularity', 'median_deg', 'avg_sp', 'median_sp') acc=0.919, f1=0.064





# Final

In [5]:
sbm_data_hub = SBM_Data()
_, sbm_results_modularity_any3, sbm_modularity_results = sbm_data_hub.load_precalculated()
X_train, y_train, _, _, _, _, feature_names = sbm_data_hub.make_dataset(sbm_results_modularity_any3)

datasets_data_hub = Datasets_Data()
_, datasets_results_modularity_any3, datasets_modularity_results = datasets_data_hub.load_precalculated()
X_val, y_val, _, _, _, _, feature_names = datasets_data_hub.make_dataset(datasets_results_modularity_any3)

for i, name in enumerate(feature_names[:-25]):
    was_logged = np.array(sbm_data_hub.features_to_log)[sbm_data_hub.allowed_features][i]
    X_train[:, i] = ((X_train[:, i] - np.mean(X_train[:, i])) if was_logged else X_train[:, i]) / np.std(X_train[:, i])
    X_val[:, i] = ((X_val[:, i] - np.mean(X_train[:, i])) if was_logged else X_val[:, i]) / np.std(X_train[:, i])

  6%|▌         | 4/69 [00:00<00:01, 35.93it/s]

wrapper: cache file ../../cache/cache/feature_importance/100_2_0.05_0.001.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.05_0.002.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.05_0.005.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.05_0.007.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.05_0.010.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.05_0.020.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.05_0.030.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.05_0.050.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.10_0.001.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.10_0.002

 42%|████▏     | 29/69 [00:00<00:00, 55.41it/s]

wrapper: cache file ../../cache/cache/feature_importance/100_2_0.10_0.001_1.00.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.10_0.005_1.00.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.10_0.010_1.00.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.15_0.010.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.15_0.030.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.15_0.050.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.15_0.070.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.15_0.100.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_2_0.15_0.150.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/1

 72%|███████▏  | 50/69 [00:00<00:00, 70.58it/s]

wrapper: cache file ../../cache/cache/feature_importance/102_3_0.30_0.100.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/102_3_0.30_0.150.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_4_0.10_0.001.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_4_0.10_0.005.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_4_0.10_0.010.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_4_0.10_0.020.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_4_0.10_0.050.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_4_0.10_0.100.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_4_0.30_0.100.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/100_4_0.30_0.150

100%|██████████| 69/69 [00:00<00:00, 89.21it/s]

wrapper: cache file ../../cache/cache/feature_importance/200_2_0.30_0.150.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/201_3_0.30_0.100.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/200_4_0.10_0.001.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/200_4_0.10_0.005.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/200_4_0.10_0.010.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/200_4_0.10_0.020.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/200_4_0.10_0.050.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/200_4_0.10_0.100.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/200_4_0.30_0.100.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/200_4_0.30_0.150


100%|██████████| 24/24 [00:00<00:00, 168.04it/s]


wrapper: cache file ../../cache/cache/feature_importance/cora_DB.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/cora_EC.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/cora_HA.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/cora_HCI.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/cora_IR.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/cora_Net.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/dolphins.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/eu-core.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/eurosis.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance/football.pkl found! Skip calculations
wrapper: cache file ../../cache/cache/feature_importance

In [6]:
X_val.shape[0] / 25 / 7

24.0