In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle as pkl
import re

# Fully Connected

In [16]:
# Fully connected without masking
with open('./modified_XGW-GAT/save_results_tuning_fc.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
21,4,,,64,,85.44,0.63,55.5,0.28,86.08,55.78,0.789104,0.601247,0.888889,0.71875
5,2,,,64,,85.44,0.0,54.67,1.48,85.44,56.16,0.792619,0.587427,0.888889,0.71875
28,8,,,32,,85.13,0.32,55.7,1.0,85.44,56.7,0.611599,0.633333,0.677778,0.64375
27,8,,,16,,84.81,1.27,58.98,1.96,86.08,60.95,0.822496,0.517806,0.911111,0.64375
30,8,,,128,,84.81,0.63,55.62,2.88,85.44,58.5,0.789104,0.565366,0.9,0.725
22,4,,,128,,84.49,0.32,53.3,0.12,84.81,53.42,0.794376,0.528691,0.911111,0.6875
14,3,,,128,,84.49,1.58,55.65,0.68,86.08,56.33,0.782074,0.581044,0.888889,0.71875
13,3,,,64,,84.49,0.32,52.76,1.02,84.81,53.78,0.794376,0.596465,0.888889,0.71875
12,3,,,32,,84.49,0.32,55.9,0.8,84.81,56.7,0.792619,0.599387,0.888889,0.71875
20,4,,,32,,84.49,0.32,56.27,0.26,84.81,56.53,0.810193,0.538266,0.922222,0.69375


In [17]:
# Fully connected with one layer masking
with open('./modified_XGW-GAT/save_results_tuning_fc_masking.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
0,2,,,2,,85.44,0.0,57.36,2.47,85.44,59.83,0.720562,0.619557,0.822222,0.68125
26,8,,,8,,85.44,0.0,55.76,0.2,85.44,55.96,0.773286,0.579712,0.877778,0.7125
30,8,,,128,,85.13,0.32,57.34,1.65,85.44,58.99,0.760984,0.588212,0.888889,0.675
5,2,,,64,,85.13,0.32,56.5,0.03,85.44,56.53,0.787346,0.584236,0.922222,0.7375
6,2,,,128,,84.81,0.0,54.1,0.84,84.81,54.94,0.790861,0.574404,0.911111,0.6875
23,4,,,256,,84.81,0.0,53.49,0.81,84.81,54.3,0.794376,0.588491,0.911111,0.6875
22,4,,,128,,84.81,0.0,56.01,1.11,84.81,57.12,0.817223,0.550494,0.922222,0.69375
20,4,,,32,,84.81,0.63,56.41,1.98,85.44,58.4,0.655536,0.588169,0.788889,0.6625
18,4,,,8,,84.81,0.63,57.37,2.15,85.44,59.51,0.752197,0.606813,0.866667,0.70625
14,3,,,128,,84.49,0.95,58.09,0.43,85.44,58.52,0.648506,0.583914,0.8,0.7125


# GCN / GNN

In [13]:
# GNN without masking
with open('./modified_XGW-GAT/save_results_tuning_gcn.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
8,3,2,2,2,0.5,81.96,2.22,54.86,2.28,84.18,57.13,0.651408,0.561801,0.8,0.625
7,2,3,4,4,0.5,80.7,3.48,58.14,3.39,84.18,61.53,0.77993,0.520094,0.866667,0.6625
3,2,2,4,4,0.5,80.38,0.0,55.35,0.47,80.38,55.81,0.786972,0.576156,0.855556,0.65625
9,3,2,2,4,0.5,80.06,0.95,57.33,0.42,81.01,57.74,0.75,0.525877,0.877778,0.66875
11,3,2,4,4,0.5,80.06,0.32,54.98,1.24,80.38,56.22,0.757042,0.577954,0.911111,0.775
14,3,3,4,2,0.5,79.11,0.63,59.2,0.22,79.75,59.42,0.757042,0.534126,0.888889,0.7625
6,2,3,4,2,0.5,78.8,4.11,53.06,3.06,82.91,56.12,0.746479,0.535698,0.877778,0.66875
15,3,3,4,4,0.5,78.48,2.53,56.83,0.08,81.01,56.9,0.77993,0.496188,0.9,0.725
2,2,2,4,2,0.5,77.85,0.63,59.54,2.9,78.48,62.44,0.751761,0.558818,0.866667,0.6625
12,3,3,2,2,0.5,77.53,2.22,62.14,4.37,79.75,66.51,0.730634,0.573916,0.822222,0.76875


In [21]:
# GNN with one layer masking
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_l1.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
12,3,3,2,2,0.5,81.65,1.9,59.59,0.73,83.54,60.32,0.771127,0.554608,0.877778,0.66875
9,3,2,2,4,0.5,81.33,0.95,59.39,1.27,82.28,60.66,0.767606,0.516616,0.9,0.68125
8,3,2,2,2,0.5,80.7,2.85,55.41,2.67,83.54,58.08,0.732394,0.523185,0.866667,0.6625
11,3,2,4,4,0.5,80.38,0.63,52.95,0.15,81.01,53.11,0.778169,0.590748,0.9,0.76875
3,2,2,4,4,0.5,79.75,0.0,54.83,2.28,79.75,57.12,0.786972,0.584125,0.877778,0.7125
14,3,3,4,2,0.5,79.43,2.85,56.5,1.71,82.28,58.21,0.762324,0.513418,0.922222,0.78125
13,3,3,2,4,0.5,79.11,1.9,61.53,4.42,81.01,65.95,0.77993,0.563923,0.877778,0.66875
15,3,3,4,4,0.5,78.8,3.48,55.46,1.91,82.28,57.37,0.774648,0.516896,0.877778,0.66875
7,2,3,4,4,0.5,78.48,2.53,61.02,1.13,81.01,62.16,0.767606,0.516616,0.888889,0.71875
6,2,3,4,2,0.5,78.16,4.11,54.54,1.74,82.28,56.28,0.742958,0.529582,0.9,0.68125


In [18]:
# GNN with Masked Autoencoder
with open('./modified_XGW-GAT/save_results_tuning_gcn_mae.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
1,2,2,2,4,0.5,83.86,1.58,64.32,2.9,85.44,67.23,0.774648,0.528849,0.944444,0.88125
2,2,2,4,2,0.5,82.91,0.63,56.91,2.66,83.54,59.58,0.739437,0.503543,0.855556,0.7
11,3,2,4,4,0.5,82.91,1.27,56.91,1.52,84.18,58.43,0.772887,0.555674,0.877778,0.75625
15,3,3,4,4,0.5,82.91,1.27,55.49,0.17,84.18,55.66,0.799296,0.571665,0.888889,0.71875
14,3,3,4,2,0.5,82.59,0.32,57.57,0.08,82.91,57.65,0.746479,0.535698,0.888889,0.7625
12,3,3,2,2,0.5,81.96,1.58,54.06,0.89,83.54,54.94,0.739437,0.55534,0.844444,0.69375
5,2,3,2,4,0.5,81.33,2.22,62.17,3.65,83.54,65.81,0.764085,0.578234,0.888889,0.80625
3,2,2,4,4,0.5,80.38,0.63,59.93,1.48,81.01,61.41,0.765845,0.547425,0.922222,0.78125
9,3,2,2,4,0.5,80.38,1.27,55.51,2.24,81.65,57.75,0.698944,0.526836,0.855556,0.7
10,3,2,4,2,0.5,80.06,0.95,63.93,0.66,81.01,64.58,0.764085,0.558312,0.911111,0.81875


# GAT

In [19]:
# GAT without masking
with open('./modified_XGW-GAT/save_results_tuning_gat.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
16,3,2,8,4,0.5,72.78,0.63,57.14,0.78,73.42,57.93,0.71831,0.586375,0.788889,0.53125
11,3,2,2,8,0.5,68.35,11.39,53.45,0.65,79.75,54.1,0.783451,0.530195,0.9,0.68125
5,2,2,4,8,0.5,67.09,8.86,55.91,0.65,75.95,56.56,0.575704,0.575725,0.722222,0.66875
8,2,2,8,8,0.5,67.09,10.76,57.53,1.14,77.85,58.68,0.755282,0.53306,0.866667,0.61875
2,2,2,2,8,0.5,59.81,18.67,58.2,1.77,78.48,59.97,0.491197,0.568381,0.588889,0.725
10,3,2,2,4,0.5,53.8,3.16,55.26,0.03,56.96,55.29,0.452465,0.536958,0.555556,0.6625
17,3,2,8,8,0.5,51.9,11.39,59.32,1.24,63.29,60.56,0.551056,0.5608,0.577778,0.63125
14,3,2,4,8,0.5,48.1,2.53,58.09,1.27,50.63,59.36,0.554577,0.602776,0.6,0.73125
15,3,2,8,2,0.5,47.78,2.85,54.19,3.45,50.63,57.65,0.184859,0.502412,0.111111,0.5
12,3,2,4,2,0.5,47.15,7.91,52.13,0.07,55.06,52.21,0.552817,0.56585,0.7,0.7


# GCN CHOSEN: Baseline masking L1 vs L2 vs ElasticNet vs K support norm vs Frob norm

In [29]:
# GNN with one layer masking and L1 regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_l1.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
12,3,3,2,2,0.5,81.65,1.9,59.59,0.73,83.54,60.32,0.771127,0.554608,0.877778,0.66875
9,3,2,2,4,0.5,81.33,0.95,59.39,1.27,82.28,60.66,0.767606,0.516616,0.9,0.68125
8,3,2,2,2,0.5,80.7,2.85,55.41,2.67,83.54,58.08,0.732394,0.523185,0.866667,0.6625
11,3,2,4,4,0.5,80.38,0.63,52.95,0.15,81.01,53.11,0.778169,0.590748,0.9,0.76875
3,2,2,4,4,0.5,79.75,0.0,54.83,2.28,79.75,57.12,0.786972,0.584125,0.877778,0.7125
14,3,3,4,2,0.5,79.43,2.85,56.5,1.71,82.28,58.21,0.762324,0.513418,0.922222,0.78125
13,3,3,2,4,0.5,79.11,1.9,61.53,4.42,81.01,65.95,0.77993,0.563923,0.877778,0.66875
15,3,3,4,4,0.5,78.8,3.48,55.46,1.91,82.28,57.37,0.774648,0.516896,0.877778,0.66875
7,2,3,4,4,0.5,78.48,2.53,61.02,1.13,81.01,62.16,0.767606,0.516616,0.888889,0.71875
6,2,3,4,2,0.5,78.16,4.11,54.54,1.74,82.28,56.28,0.742958,0.529582,0.9,0.68125


In [27]:
# GNN with one layer masking and L2 regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_l2.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
7,2,3,4,4,0.5,81.65,0.63,59.54,0.4,82.28,59.94,0.760563,0.548211,0.844444,0.65
8,3,2,2,2,0.5,81.33,1.58,56.48,2.62,82.91,59.1,0.746479,0.515776,0.9,0.68125
9,3,2,2,4,0.5,81.01,1.27,57.26,2.66,82.28,59.92,0.774648,0.52088,0.888889,0.675
6,2,3,4,2,0.5,80.7,0.32,54.23,3.67,81.01,57.89,0.707746,0.50826,0.877778,0.66875
12,3,3,2,2,0.5,80.38,1.27,59.16,2.63,81.65,61.79,0.748239,0.560671,0.866667,0.70625
3,2,2,4,4,0.5,79.75,0.63,55.97,1.03,80.38,57.0,0.792254,0.587323,0.866667,0.70625
2,2,2,4,2,0.5,79.43,0.32,58.11,3.54,79.75,61.64,0.732394,0.586935,0.855556,0.7
15,3,3,4,4,0.5,79.43,2.85,56.05,2.5,82.28,58.55,0.772887,0.507861,0.866667,0.6625
5,2,3,2,4,0.5,79.11,2.53,58.05,1.89,81.65,59.94,0.741197,0.592266,0.866667,0.75
11,3,2,4,4,0.5,79.11,0.0,53.65,0.86,79.11,54.51,0.785211,0.598996,0.9,0.76875


In [28]:
# GNN with one layer masking and ElasticNet regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_elastic.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
9,3,2,2,4,0.5,81.01,1.9,58.42,3.72,82.91,62.14,0.767606,0.508647,0.9,0.68125
11,3,2,4,4,0.5,80.7,1.58,54.25,1.1,82.28,55.35,0.77993,0.591814,0.911111,0.775
12,3,3,2,2,0.5,80.7,0.32,59.04,0.8,81.01,59.84,0.757042,0.53811,0.866667,0.6625
3,2,2,4,4,0.5,80.38,0.63,56.09,1.02,81.01,57.12,0.797535,0.586537,0.855556,0.65625
7,2,3,4,4,0.5,80.38,3.8,59.58,1.93,84.18,61.51,0.795775,0.529689,0.877778,0.66875
8,3,2,2,2,0.5,79.75,3.16,55.52,2.19,82.91,57.71,0.679577,0.558937,0.822222,0.6375
14,3,3,4,2,0.5,79.75,0.63,56.79,1.64,80.38,58.43,0.776408,0.513978,0.922222,0.7375
13,3,3,2,4,0.5,79.11,0.0,61.0,1.9,79.11,62.9,0.758803,0.571052,0.866667,0.6625
15,3,3,4,4,0.5,78.8,2.22,56.59,0.22,81.01,56.81,0.783451,0.514258,0.877778,0.7125
2,2,2,4,2,0.5,77.85,0.0,59.16,1.55,77.85,60.71,0.730634,0.569932,0.855556,0.65625


In [None]:
# GNN with one layer masking and K-support norm regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_ksupport.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

In [24]:
# GNN with one layer masking and Frobenius norm regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_frobenius.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
8,3,2,2,2,0.5,82.59,2.22,55.76,1.9,84.81,57.67,0.772887,0.511846,0.888889,0.63125
12,3,3,2,2,0.5,81.96,0.32,60.04,1.05,82.28,61.1,0.723592,0.557699,0.855556,0.74375
14,3,3,4,2,0.5,81.96,2.22,60.85,3.51,84.18,64.37,0.790493,0.530475,0.911111,0.73125
5,2,3,2,4,0.5,80.7,0.32,56.04,3.15,81.01,59.19,0.78169,0.568973,0.888889,0.7625
9,3,2,2,4,0.5,80.7,0.32,55.34,2.73,81.01,58.07,0.765845,0.51555,0.888889,0.675
3,2,2,4,4,0.5,80.38,0.0,55.64,2.5,80.38,58.13,0.792254,0.579354,0.866667,0.70625
7,2,3,4,4,0.5,79.75,2.53,60.84,0.01,82.28,60.85,0.776408,0.5339,0.866667,0.6625
13,3,3,2,4,0.5,79.43,2.85,59.06,3.69,82.28,62.75,0.794014,0.548545,0.877778,0.66875
15,3,3,4,4,0.5,79.43,3.48,55.7,2.23,82.91,57.93,0.785211,0.535246,0.877778,0.66875
6,2,3,4,2,0.5,78.8,2.22,53.21,3.21,81.01,56.43,0.723592,0.521839,0.866667,0.6625


# GCN WITH VAE comparison hyperparameter tuning

In [32]:
# GNN with one layer masking and Frobenius norm regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_vae.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
1,2,2,2,4,0.5,81.73,2.24,58.55,1.68,83.97,60.23,0.786972,0.556234,0.897727,0.647679
14,3,3,4,2,0.5,81.73,1.6,59.32,1.85,83.33,61.17,0.788732,0.513472,0.886364,0.690577
5,2,3,2,4,0.5,80.77,0.0,58.36,0.16,80.77,58.52,0.764085,0.53839,0.897727,0.746132
11,3,2,4,4,0.5,80.77,1.28,53.35,0.44,82.05,53.79,0.744718,0.55057,0.863636,0.628692
6,2,3,4,2,0.5,80.13,0.64,54.79,1.57,80.77,56.36,0.732394,0.523185,0.840909,0.66526
15,3,3,4,4,0.5,79.81,1.6,54.71,1.73,81.41,56.44,0.762324,0.525371,0.875,0.635021
9,3,2,2,4,0.5,79.49,1.28,59.39,2.73,80.77,62.12,0.742958,0.533566,0.886364,0.64135
10,3,2,4,2,0.5,79.49,1.92,54.68,2.79,81.41,57.47,0.769366,0.505729,0.897727,0.647679
3,2,2,4,4,0.5,78.85,2.56,57.72,2.61,81.41,60.33,0.762324,0.553262,0.909091,0.752461
7,2,3,4,4,0.5,77.24,3.53,54.84,1.06,80.77,55.9,0.753521,0.516056,0.897727,0.647679


In [None]:
# GNN with one layer masking and Frobenius norm regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_vae_weights.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

In [36]:
# GNN with one layer masking and Frobenius norm regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_vae_weights.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
6,2,2,2,4,0.5,10.0,0.1,81.73,0.32,59.15,3.28,82.05,62.44,0.739437,0.571278,0.920455,0.75879
0,2,2,2,4,0.5,0.1,0.1,81.09,0.32,56.26,1.7,81.41,57.95,0.774648,0.52088,0.875,0.684248
5,2,2,2,4,0.5,1.0,10.0,80.77,3.21,62.36,3.46,83.97,65.82,0.758803,0.567067,0.897727,0.746132
7,2,2,2,4,0.5,10.0,1.0,79.49,1.28,60.53,8.6,80.77,69.13,0.698944,0.53082,0.863636,0.677918
8,2,2,2,4,0.5,10.0,10.0,79.49,2.56,56.69,0.44,82.05,57.13,0.742958,0.529582,0.875,0.684248
2,2,2,2,4,0.5,0.1,10.0,78.53,0.32,58.91,1.88,78.85,60.8,0.735915,0.545239,0.840909,0.714487
1,2,2,2,4,0.5,0.1,1.0,78.21,1.28,55.69,3.42,79.49,59.1,0.725352,0.518921,0.806818,0.646273
4,2,2,2,4,0.5,1.0,1.0,76.28,1.28,58.13,3.96,77.56,62.1,0.748239,0.556686,0.829545,0.708158
3,2,2,2,4,0.5,1.0,0.1,75.64,1.28,55.14,2.87,76.92,58.01,0.758803,0.586989,0.840909,0.616034


In [37]:
# GNN with one layer masking and Frobenius norm regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_vae_weights.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
8,3,3,4,2,0.5,10.0,10.0,81.41,1.92,56.75,0.64,83.33,57.39,0.755282,0.537044,0.875,0.7827
3,3,3,4,2,0.5,1.0,0.1,80.45,2.88,57.44,3.17,83.33,60.61,0.737676,0.510446,0.886364,0.690577
6,3,3,4,2,0.5,10.0,0.1,80.13,0.64,52.05,0.41,80.77,52.46,0.732394,0.49131,0.897727,0.696906
0,3,3,4,2,0.5,0.1,0.1,79.81,0.96,55.29,3.04,80.77,58.33,0.767606,0.548491,0.897727,0.647679
1,3,3,4,2,0.5,0.1,1.0,78.21,0.0,59.78,6.51,78.21,66.29,0.764085,0.57425,0.863636,0.727145
5,3,3,4,2,0.5,1.0,10.0,76.6,4.17,55.77,5.02,80.77,60.8,0.75,0.549784,0.818182,0.652602
7,3,3,4,2,0.5,10.0,1.0,74.04,0.32,53.21,0.39,74.36,53.6,0.43662,0.543301,0.556818,0.605485
2,3,3,4,2,0.5,0.1,10.0,54.49,26.92,54.78,2.32,81.41,57.09,0.751761,0.507021,0.875,0.635021
4,3,3,4,2,0.5,1.0,1.0,50.64,24.36,57.13,5.75,75.0,62.88,0.234155,0.52031,0.181818,0.544304


In [38]:
# GNN with one layer masking and Frobenius norm regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_vae_weights.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
6,2,3,2,4,0.5,10.0,0.1,80.77,0.64,54.46,2.19,81.41,56.65,0.771127,0.562577,0.897727,0.696906
0,2,3,2,4,0.5,0.1,0.1,79.81,2.24,56.64,4.91,82.05,61.55,0.734155,0.488391,0.863636,0.677918
8,2,3,2,4,0.5,10.0,10.0,79.81,1.6,58.64,1.63,81.41,60.26,0.741197,0.548437,0.829545,0.609705
1,2,3,2,4,0.5,0.1,1.0,79.17,0.32,58.68,3.25,79.49,61.93,0.765845,0.611176,0.818182,0.701828
4,2,3,2,4,0.5,1.0,1.0,79.17,0.32,55.22,0.65,79.49,55.87,0.75,0.557752,0.840909,0.66526
7,2,3,2,4,0.5,10.0,1.0,78.85,0.0,54.8,2.52,78.85,57.32,0.769366,0.549557,0.886364,0.690577
2,2,3,2,4,0.5,0.1,10.0,77.24,2.24,56.74,0.68,79.49,57.42,0.713028,0.543333,0.772727,0.578059
3,2,3,2,4,0.5,1.0,0.1,76.92,4.49,58.93,0.92,81.41,59.85,0.757042,0.546079,0.840909,0.66526
5,2,3,2,4,0.5,1.0,10.0,74.68,0.96,55.83,2.98,75.64,58.81,0.716549,0.557419,0.829545,0.708158


# GCN with VAE comparison Norm 

In [39]:
# GNN with one layer masking and Frobenius norm regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_vae_weights.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
7,2,2,2,4,0.5,10.0,1.0,82.91,0.63,58.67,0.07,83.54,58.74,0.702465,0.54889,0.844444,0.7375
8,2,2,2,4,0.5,10.0,10.0,82.59,3.48,60.96,4.25,86.08,65.21,0.174296,0.5,0.111111,0.5
2,2,2,2,4,0.5,0.1,10.0,82.28,1.9,57.74,1.41,84.18,59.14,0.174296,0.5,0.111111,0.5
1,2,2,2,4,0.5,0.1,1.0,79.43,0.32,59.71,2.98,79.75,62.69,0.755282,0.556966,0.833333,0.64375
3,2,2,2,4,0.5,1.0,0.1,79.43,1.58,55.6,4.1,81.01,59.7,0.735915,0.529302,0.833333,0.6875
5,2,2,2,4,0.5,1.0,10.0,79.43,4.75,55.07,0.6,84.18,55.67,0.174296,0.5,0.111111,0.5
4,2,2,2,4,0.5,1.0,1.0,78.48,1.27,56.33,3.31,79.75,59.64,0.77993,0.544001,0.933333,0.83125
6,2,2,2,4,0.5,10.0,0.1,78.48,1.27,57.9,3.05,79.75,60.95,0.755282,0.564935,0.888889,0.71875
0,2,2,2,4,0.5,0.1,0.1,77.22,1.9,54.79,2.18,79.11,56.97,0.735915,0.53727,0.833333,0.64375


In [41]:
# GNN with one layer masking and L1 norm regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_vae_weights_l1.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
0,2,2,2,4,0.5,10,1,80.38,1.9,57.81,0.99,82.28,58.8,0.176056,0.501066,0.111111,0.5


In [42]:
# GNN with one layer masking and L1 norm regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_vae_weights_l2.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
0,2,2,2,4,0.5,10,1,78.8,0.32,57.28,3.54,79.11,60.82,0.725352,0.546811,0.822222,0.6375


In [46]:
# GNN with one layer masking and L1 norm regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_vae_weights_elastic.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda', 'weights_elastic']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,weights_elastic,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
7,2,2,2,4,0.5,10,10.0,0.5,83.86,0.95,58.39,0.95,84.81,59.34,0.174296,0.5,0.111111,0.5
2,2,2,2,4,0.5,10,0.1,1.0,82.91,1.27,59.4,1.0,84.18,60.4,0.422535,0.570632,0.466667,0.65625
4,2,2,2,4,0.5,10,1.0,0.5,82.28,0.63,53.36,0.34,82.91,53.7,0.174296,0.5,0.111111,0.5
6,2,2,2,4,0.5,10,10.0,0.2,80.7,0.32,59.34,1.98,81.01,61.32,0.174296,0.5,0.111111,0.5
8,2,2,2,4,0.5,10,10.0,1.0,80.38,0.63,61.15,4.8,81.01,65.95,0.174296,0.5,0.111111,0.5
5,2,2,2,4,0.5,10,1.0,1.0,79.75,0.63,55.49,0.27,80.38,55.76,0.174296,0.5,0.111111,0.5
0,2,2,2,4,0.5,10,0.1,0.2,79.11,0.63,59.09,1.42,79.75,60.51,0.667254,0.559443,0.722222,0.625
1,2,2,2,4,0.5,10,0.1,0.5,79.11,1.27,57.55,1.19,80.38,58.74,0.700704,0.563761,0.822222,0.6375
3,2,2,2,4,0.5,10,1.0,0.2,78.48,1.9,55.8,1.39,80.38,57.18,0.174296,0.5,0.111111,0.5


In [43]:
# GNN with one layer masking and L1 norm regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_vae_weights_force_0_1.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
0,2,2,2,4,0.5,10,1,78.16,1.58,54.64,0.68,79.75,55.32,0.739437,0.559325,0.866667,0.6625


# GCN VAE NULLING OUT

In [111]:
# GNN with one layer masking and L1 norm regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_vae_weights_elastic_nullingout.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        for perc in ['perc 0.7', 'perc 0.8', 'perc 0.95', 'perc 0.9', 'perc 0']:
            value = value.replace(perc, "")
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda', 'weights_elastic']
additional_columns = ['Perc0_AvgAcc', 'Perc0_StdAcc', 'Perc0_AvgAUC', 'Perc0_StdAUC',
                      'Perc07_AvgAcc', 'Perc07_StdAcc', 'Perc07_AvgAUC', 'Perc07_StdAUC',
                      'Perc08_AvgAcc', 'Perc08_StdAcc', 'Perc08_AvgAUC', 'Perc08_StdAUC',
                      'Perc095_AvgAcc', 'Perc095_StdAcc', 'Perc095_AvgAUC', 'Perc095_StdAUC',
                      'Perc09_AvgAcc', 'Perc09_StdAcc', 'Perc09_AvgAUC', 'Perc09_StdAUC',
                      'MaxAvgAcc', 'MaxAvgAUC',
                      'Perc0_TreatmentAcc', 'Perc0_TreatmentAUC', 'Perc0_ScannerAcc', 'Perc0_ScannerAUC',
                      'Perc07_TreatmentAcc', 'Perc07_TreatmentAUC', 'Perc07_ScannerAcc', 'Perc07_ScannerAUC',
                      'Perc08_TreatmentAcc', 'Perc08_TreatmentAUC', 'Perc08_ScannerAcc', 'Perc08_ScannerAUC',
                      'Perc095_TreatmentAcc', 'Perc095_TreatmentAUC', 'Perc095_ScannerAcc', 'Perc095_ScannerAUC',
                      'Perc09_TreatmentAcc', 'Perc09_TreatmentAUC', 'Perc09_ScannerAcc', 'Perc09_ScannerAUC']
columns.extend(additional_columns)
df = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df

50
50


Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,weights_elastic,Perc0_AvgAcc,Perc0_StdAcc,...,Perc08_ScannerAcc,Perc08_ScannerAUC,Perc095_TreatmentAcc,Perc095_TreatmentAUC,Perc095_ScannerAcc,Perc095_ScannerAUC,Perc09_TreatmentAcc,Perc09_TreatmentAUC,Perc09_ScannerAcc,Perc09_ScannerAUC
0,2,2,2,4,0.5,10,10,0.5,80.06,2.85,...,0.888889,0.5,0.826011,0.5,0.888889,0.5,0.826011,0.5,0.888889,0.5


In [112]:
df[['Perc0_AvgAcc', 'Perc07_AvgAcc', 'Perc08_AvgAcc', 'Perc095_AvgAcc', 'Perc09_AvgAcc']]

Unnamed: 0,Perc0_AvgAcc,Perc07_AvgAcc,Perc08_AvgAcc,Perc095_AvgAcc,Perc09_AvgAcc
0,80.06,85.76,85.13,85.44,85.13


In [113]:
df[['Perc0_TreatmentAcc', 'Perc07_TreatmentAcc', 'Perc08_TreatmentAcc', 'Perc095_TreatmentAcc', 'Perc09_TreatmentAcc']]

Unnamed: 0,Perc0_TreatmentAcc,Perc07_TreatmentAcc,Perc08_TreatmentAcc,Perc095_TreatmentAcc,Perc09_TreatmentAcc
0,0.71529,0.824253,0.826011,0.826011,0.826011


In [114]:
df[['Perc0_ScannerAcc', 'Perc07_ScannerAcc', 'Perc08_ScannerAcc', 'Perc095_ScannerAcc', 'Perc09_ScannerAcc']]

Unnamed: 0,Perc0_ScannerAcc,Perc07_ScannerAcc,Perc08_ScannerAcc,Perc095_ScannerAcc,Perc09_ScannerAcc
0,0.877778,0.888889,0.888889,0.888889,0.888889


In [110]:
# GNN with one layer masking and L1 norm regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_mae_frobenius_nullingout.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        for perc in ['perc 0.7', 'perc 0.8', 'perc 0.95', 'perc 0.9', 'perc 0']:
            value = value.replace(perc, "")
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda', 'weights_elastic']
additional_columns = ['Perc0_AvgAcc', 'Perc0_StdAcc', 'Perc0_AvgAUC', 'Perc0_StdAUC',
                      'Perc07_AvgAcc', 'Perc07_StdAcc', 'Perc07_AvgAUC', 'Perc07_StdAUC',
                      'Perc08_AvgAcc', 'Perc08_StdAcc', 'Perc08_AvgAUC', 'Perc08_StdAUC',
                      'Perc095_AvgAcc', 'Perc095_StdAcc', 'Perc095_AvgAUC', 'Perc095_StdAUC',
                      'Perc09_AvgAcc', 'Perc09_StdAcc', 'Perc09_AvgAUC', 'Perc09_StdAUC',
                      'MaxAvgAcc', 'MaxAvgAUC',
                      'Perc0_TreatmentAcc', 'Perc0_TreatmentAUC', 'Perc0_ScannerAcc', 'Perc0_ScannerAUC',
                      'Perc07_TreatmentAcc', 'Perc07_TreatmentAUC', 'Perc07_ScannerAcc', 'Perc07_ScannerAUC',
                      'Perc08_TreatmentAcc', 'Perc08_TreatmentAUC', 'Perc08_ScannerAcc', 'Perc08_ScannerAUC',
                      'Perc095_TreatmentAcc', 'Perc095_TreatmentAUC', 'Perc095_ScannerAcc', 'Perc095_ScannerAUC',
                      'Perc09_TreatmentAcc', 'Perc09_TreatmentAUC', 'Perc09_ScannerAcc', 'Perc09_ScannerAUC']
columns.extend(additional_columns)
df = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df

50
50


Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,weights_elastic,Perc0_AvgAcc,Perc0_StdAcc,...,Perc08_ScannerAcc,Perc08_ScannerAUC,Perc095_TreatmentAcc,Perc095_TreatmentAUC,Perc095_ScannerAcc,Perc095_ScannerAUC,Perc09_TreatmentAcc,Perc09_TreatmentAUC,Perc09_ScannerAcc,Perc09_ScannerAUC
0,2,2,2,4,0.5,10,10,0.5,81.01,0.63,...,0.877778,0.5375,0.826011,0.503987,0.888889,0.5,0.817223,0.494681,0.888889,0.5


In [91]:
df[['Perc0_AvgAcc', 'Perc07_AvgAcc', 'Perc08_AvgAcc', 'Perc095_AvgAcc', 'Perc09_AvgAcc']]

Unnamed: 0,Perc0_AvgAcc,Perc07_AvgAcc,Perc08_AvgAcc,Perc095_AvgAcc,Perc09_AvgAcc
0,81.01,81.33,81.01,81.96,79.43


In [100]:
df[['Perc0_TreatmentAcc', 'Perc07_TreatmentAcc', 'Perc08_TreatmentAcc', 'Perc095_TreatmentAcc', 'Perc09_TreatmentAcc']]

Unnamed: 0,Perc0_TreatmentAcc,Perc07_TreatmentAcc,Perc08_TreatmentAcc,Perc095_TreatmentAcc,Perc09_TreatmentAcc
0,0.776801,0.822496,0.824253,0.826011,0.817223


In [101]:
df[['Perc0_ScannerAcc', 'Perc07_ScannerAcc', 'Perc08_ScannerAcc', 'Perc095_ScannerAcc', 'Perc09_ScannerAcc']]

Unnamed: 0,Perc0_ScannerAcc,Perc07_ScannerAcc,Perc08_ScannerAcc,Perc095_ScannerAcc,Perc09_ScannerAcc
0,0.922222,0.877778,0.877778,0.888889,0.888889


In [102]:
# GNN with one layer masking and L1 norm regularization
with open('./modified_XGW-GAT/save_results_tuning_gcn_baseline_masking_frobenius_nullingout.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        for perc in ['perc 0.7', 'perc 0.8', 'perc 0.95', 'perc 0.9', 'perc 0']:
            value = value.replace(perc, "")
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda', 'weights_elastic']
additional_columns = ['Perc0_AvgAcc', 'Perc0_StdAcc', 'Perc0_AvgAUC', 'Perc0_StdAUC',
                      'Perc07_AvgAcc', 'Perc07_StdAcc', 'Perc07_AvgAUC', 'Perc07_StdAUC',
                      'Perc08_AvgAcc', 'Perc08_StdAcc', 'Perc08_AvgAUC', 'Perc08_StdAUC',
                      'Perc095_AvgAcc', 'Perc095_StdAcc', 'Perc095_AvgAUC', 'Perc095_StdAUC',
                      'Perc09_AvgAcc', 'Perc09_StdAcc', 'Perc09_AvgAUC', 'Perc09_StdAUC',
                      'MaxAvgAcc', 'MaxAvgAUC',
                      'Perc0_TreatmentAcc', 'Perc0_TreatmentAUC', 'Perc0_ScannerAcc', 'Perc0_ScannerAUC',
                      'Perc07_TreatmentAcc', 'Perc07_TreatmentAUC', 'Perc07_ScannerAcc', 'Perc07_ScannerAUC',
                      'Perc08_TreatmentAcc', 'Perc08_TreatmentAUC', 'Perc08_ScannerAcc', 'Perc08_ScannerAUC',
                      'Perc095_TreatmentAcc', 'Perc095_TreatmentAUC', 'Perc095_ScannerAcc', 'Perc095_ScannerAUC',
                      'Perc09_TreatmentAcc', 'Perc09_TreatmentAUC', 'Perc09_ScannerAcc', 'Perc09_ScannerAUC']
columns.extend(additional_columns)
df = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df

50
50


Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,weights_elastic,Perc0_AvgAcc,Perc0_StdAcc,...,Perc08_ScannerAcc,Perc08_ScannerAUC,Perc095_TreatmentAcc,Perc095_TreatmentAUC,Perc095_ScannerAcc,Perc095_ScannerAUC,Perc09_TreatmentAcc,Perc09_TreatmentAUC,Perc09_ScannerAcc,Perc09_ScannerAUC
0,2,2,2,4,0.5,10,10,0.5,78.16,1.58,...,0.855556,0.525,0.804921,0.503181,0.833333,0.46875,0.811951,0.507436,0.866667,0.575


In [103]:
df[['Perc0_AvgAcc', 'Perc07_AvgAcc', 'Perc08_AvgAcc', 'Perc095_AvgAcc', 'Perc09_AvgAcc']]

Unnamed: 0,Perc0_AvgAcc,Perc07_AvgAcc,Perc08_AvgAcc,Perc095_AvgAcc,Perc09_AvgAcc
0,78.16,82.91,83.86,83.86,83.86


In [104]:
df[['Perc0_TreatmentAcc', 'Perc07_TreatmentAcc', 'Perc08_TreatmentAcc', 'Perc095_TreatmentAcc', 'Perc09_TreatmentAcc']]

Unnamed: 0,Perc0_TreatmentAcc,Perc07_TreatmentAcc,Perc08_TreatmentAcc,Perc095_TreatmentAcc,Perc09_TreatmentAcc
0,0.764499,0.804921,0.808436,0.804921,0.811951


In [105]:
df[['Perc0_ScannerAcc', 'Perc07_ScannerAcc', 'Perc08_ScannerAcc', 'Perc095_ScannerAcc', 'Perc09_ScannerAcc']]

Unnamed: 0,Perc0_ScannerAcc,Perc07_ScannerAcc,Perc08_ScannerAcc,Perc095_ScannerAcc,Perc09_ScannerAcc
0,0.888889,0.866667,0.855556,0.833333,0.866667


# SpaRef Pipeline: GCN

In [116]:
# GNN with one layer masking and L1 norm regularization
with open('./modified_XGW-GAT_sparef/save_results_tuning_sparef_gcn_baseline_masking_mae_weights_frobenius_nullingout.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        for perc in ['perc 0.7', 'perc 0.8', 'perc 0.95', 'perc 0.9', 'perc 0']:
            value = value.replace(perc, "")
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda', 'weights_elastic']
additional_columns = ['Perc0_AvgAcc', 'Perc0_StdAcc', 'Perc0_AvgAUC', 'Perc0_StdAUC',
                      'Perc07_AvgAcc', 'Perc07_StdAcc', 'Perc07_AvgAUC', 'Perc07_StdAUC',
                      'Perc08_AvgAcc', 'Perc08_StdAcc', 'Perc08_AvgAUC', 'Perc08_StdAUC',
                      'Perc095_AvgAcc', 'Perc095_StdAcc', 'Perc095_AvgAUC', 'Perc095_StdAUC',
                      'Perc09_AvgAcc', 'Perc09_StdAcc', 'Perc09_AvgAUC', 'Perc09_StdAUC',
                      'MaxAvgAcc', 'MaxAvgAUC',
                      'Perc0_ScannerAcc', 'Perc0_ScannerAUC',
                      'Perc07_ScannerAcc', 'Perc07_ScannerAUC',
                      'Perc08_ScannerAcc', 'Perc08_ScannerAUC',
                      'Perc095_ScannerAcc', 'Perc095_ScannerAUC',
                      'Perc09_ScannerAcc', 'Perc09_ScannerAUC']
columns.extend(additional_columns)
df = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,weights_elastic,Perc0_AvgAcc,Perc0_StdAcc,...,Perc0_ScannerAcc,Perc0_ScannerAUC,Perc07_ScannerAcc,Perc07_ScannerAUC,Perc08_ScannerAcc,Perc08_ScannerAUC,Perc095_ScannerAcc,Perc095_ScannerAUC,Perc09_ScannerAcc,Perc09_ScannerAUC
0,2,2,2,4,0.5,1,0.01,0.5,75.45,1.41,...,0.854015,0.69249,0.875912,0.668775,0.832117,0.587549,0.832117,0.495652,0.846715,0.559486


In [118]:
df[['Perc0_AvgAcc', 'Perc07_AvgAcc', 'Perc08_AvgAcc', 'Perc095_AvgAcc', 'Perc09_AvgAcc']]

Unnamed: 0,Perc0_AvgAcc,Perc07_AvgAcc,Perc08_AvgAcc,Perc095_AvgAcc,Perc09_AvgAcc
0,75.45,79.72,81.1,83.31,82.35


In [119]:
df[['Perc0_ScannerAcc', 'Perc07_ScannerAcc', 'Perc08_ScannerAcc', 'Perc095_ScannerAcc', 'Perc09_ScannerAcc']]

Unnamed: 0,Perc0_ScannerAcc,Perc07_ScannerAcc,Perc08_ScannerAcc,Perc095_ScannerAcc,Perc09_ScannerAcc
0,0.854015,0.875912,0.832117,0.832117,0.846715


In [121]:
# GNN with one layer masking and L1 norm regularization
with open('./modified_XGW-GAT_sparef/save_results_tuning_sparef_gcn_baseline_masking_vae_weights_elastic_nullingout.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        for perc in ['perc 0.7', 'perc 0.8', 'perc 0.95', 'perc 0.9', 'perc 0']:
            value = value.replace(perc, "")
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda', 'weights_elastic']
additional_columns = ['Perc0_AvgAcc', 'Perc0_StdAcc', 'Perc0_AvgAUC', 'Perc0_StdAUC',
                      'Perc07_AvgAcc', 'Perc07_StdAcc', 'Perc07_AvgAUC', 'Perc07_StdAUC',
                      'Perc08_AvgAcc', 'Perc08_StdAcc', 'Perc08_AvgAUC', 'Perc08_StdAUC',
                      'Perc095_AvgAcc', 'Perc095_StdAcc', 'Perc095_AvgAUC', 'Perc095_StdAUC',
                      'Perc09_AvgAcc', 'Perc09_StdAcc', 'Perc09_AvgAUC', 'Perc09_StdAUC',
                      'MaxAvgAcc', 'MaxAvgAUC',
                      'Perc0_ScannerAcc', 'Perc0_ScannerAUC',
                      'Perc07_ScannerAcc', 'Perc07_ScannerAUC',
                      'Perc08_ScannerAcc', 'Perc08_ScannerAUC',
                      'Perc095_ScannerAcc', 'Perc095_ScannerAUC',
                      'Perc09_ScannerAcc', 'Perc09_ScannerAUC']
columns.extend(additional_columns)
df = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df.sort_values(by='Perc0_AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,weights_elastic,Perc0_AvgAcc,Perc0_StdAcc,...,Perc0_ScannerAcc,Perc0_ScannerAUC,Perc07_ScannerAcc,Perc07_ScannerAUC,Perc08_ScannerAcc,Perc08_ScannerAUC,Perc095_ScannerAcc,Perc095_ScannerAUC,Perc09_ScannerAcc,Perc09_ScannerAUC
7,2,2,2,4,0.5,0.1,10.0,1.0,84.18,0.63,...,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5
21,2,2,2,4,0.5,10.0,1.0,0.1,83.86,0.95,...,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5
12,2,2,2,4,0.5,1.0,1.0,0.1,83.54,1.9,...,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5
13,2,2,2,4,0.5,1.0,1.0,1.0,82.91,2.53,...,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5
3,2,2,2,4,0.5,0.1,1.0,0.1,82.91,0.63,...,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5
16,2,2,2,4,0.5,1.0,10.0,1.0,82.59,0.32,...,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5
10,2,2,2,4,0.5,1.0,0.1,1.0,82.59,0.32,...,0.766667,0.65,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5
22,2,2,2,4,0.5,10.0,1.0,1.0,81.96,1.58,...,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5
1,2,2,2,4,0.5,0.1,0.1,1.0,81.33,1.58,...,0.844444,0.60625,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5
6,2,2,2,4,0.5,0.1,10.0,0.1,81.33,0.32,...,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5,0.111111,0.5


In [122]:
df[['Perc0_AvgAcc', 'Perc07_AvgAcc', 'Perc08_AvgAcc', 'Perc095_AvgAcc', 'Perc09_AvgAcc']].sort_values(by='Perc0_AvgAcc', ascending=False)

Unnamed: 0,Perc0_AvgAcc,Perc07_AvgAcc,Perc08_AvgAcc,Perc095_AvgAcc,Perc09_AvgAcc
7,84.18,82.28,81.65,84.81,84.18
21,83.86,85.13,85.13,85.44,85.44
12,83.54,84.49,82.91,83.86,84.49
13,82.91,85.13,85.13,85.13,85.13
3,82.91,85.13,85.13,85.13,85.13
16,82.59,85.13,85.13,85.13,85.13
10,82.59,85.76,85.76,85.44,85.44
22,81.96,85.13,85.13,85.44,85.13
1,81.33,85.13,85.13,85.13,85.13
6,81.33,74.05,68.67,49.68,55.38


In [123]:
df[['Perc0_ScannerAcc', 'Perc07_ScannerAcc', 'Perc08_ScannerAcc', 'Perc095_ScannerAcc', 'Perc09_ScannerAcc']].sort_values(by='Perc0_ScannerAcc', ascending=False)

Unnamed: 0,Perc0_ScannerAcc,Perc07_ScannerAcc,Perc08_ScannerAcc,Perc095_ScannerAcc,Perc09_ScannerAcc
26,0.877778,0.888889,0.888889,0.888889,0.888889
9,0.877778,0.488889,0.488889,0.488889,0.488889
14,0.877778,0.888889,0.888889,0.888889,0.888889
1,0.844444,0.111111,0.111111,0.111111,0.111111
11,0.844444,0.888889,0.888889,0.888889,0.888889
2,0.833333,0.888889,0.888889,0.888889,0.888889
19,0.833333,0.111111,0.111111,0.111111,0.111111
8,0.833333,0.888889,0.888889,0.888889,0.888889
23,0.811111,0.888889,0.888889,0.888889,0.888889
17,0.811111,0.888889,0.888889,0.888889,0.888889


In [184]:
# GNN with one layer masking and L1 norm regularization
with open('./modified_XGW-GAT_sparef/save_results_tuning_sparef_gcn_baseline_masking_mae_weights_elastic_nullingout.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        for perc in ['perc 0.7', 'perc 0.8', 'perc 0.95', 'perc 0.9', 'perc 0']:
            value = value.replace(perc, "")
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda', 'weights_elastic']
additional_columns = ['Perc0_AvgAcc', 'Perc0_StdAcc', 'Perc0_AvgAUC', 'Perc0_StdAUC',
                      'Perc07_AvgAcc', 'Perc07_StdAcc', 'Perc07_AvgAUC', 'Perc07_StdAUC',
                      'Perc08_AvgAcc', 'Perc08_StdAcc', 'Perc08_AvgAUC', 'Perc08_StdAUC',
                      'Perc095_AvgAcc', 'Perc095_StdAcc', 'Perc095_AvgAUC', 'Perc095_StdAUC',
                      'Perc09_AvgAcc', 'Perc09_StdAcc', 'Perc09_AvgAUC', 'Perc09_StdAUC',
                      'MaxAvgAcc', 'MaxAvgAUC',
                      'Perc0_ScannerAcc', 'Perc0_ScannerAUC',
                      'Perc07_ScannerAcc', 'Perc07_ScannerAUC',
                      'Perc08_ScannerAcc', 'Perc08_ScannerAUC',
                      'Perc095_ScannerAcc', 'Perc095_ScannerAUC',
                      'Perc09_ScannerAcc', 'Perc09_ScannerAUC']
columns.extend(additional_columns)
df = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df.sort_values(by='Perc0_AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,weights_elastic,Perc0_AvgAcc,Perc0_StdAcc,...,Perc0_ScannerAcc,Perc0_ScannerAUC,Perc07_ScannerAcc,Perc07_ScannerAUC,Perc08_ScannerAcc,Perc08_ScannerAUC,Perc095_ScannerAcc,Perc095_ScannerAUC,Perc09_ScannerAcc,Perc09_ScannerAUC
0,2,2,2,4,0.5,0.1,10,1,81.95,1.95,...,0.862745,0.710227,0.862745,0.5,0.862745,0.5,0.862745,0.5,0.862745,0.5


In [185]:
df[['Perc0_AvgAcc', 'Perc07_AvgAcc', 'Perc08_AvgAcc', 'Perc095_AvgAcc', 'Perc09_AvgAcc']].sort_values(by='Perc0_AvgAcc', ascending=False)

Unnamed: 0,Perc0_AvgAcc,Perc07_AvgAcc,Perc08_AvgAcc,Perc095_AvgAcc,Perc09_AvgAcc
0,81.95,85.62,86.29,85.95,85.95


In [186]:
df[['Perc0_ScannerAcc', 'Perc07_ScannerAcc', 'Perc08_ScannerAcc', 'Perc095_ScannerAcc', 'Perc09_ScannerAcc']].sort_values(by='Perc0_ScannerAcc', ascending=False)

Unnamed: 0,Perc0_ScannerAcc,Perc07_ScannerAcc,Perc08_ScannerAcc,Perc095_ScannerAcc,Perc09_ScannerAcc
0,0.862745,0.862745,0.862745,0.862745,0.862745


In [202]:
# GNN with one layer masking and L1 norm regularization
with open('./modified_XGW-GAT_sparef/save_results_tuning_gcn_baseline_masking_mae_weights_elastic_nullingout.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        for perc in ['perc 0.7', 'perc 0.8', 'perc 0.95', 'perc 0.9', 'perc 0']:
            value = value.replace(perc, "")
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda', 'weights_elastic']
additional_columns = ['Perc0_AvgAcc', 'Perc0_StdAcc', 'Perc0_AvgAUC', 'Perc0_StdAUC',
                      'Perc07_AvgAcc', 'Perc07_StdAcc', 'Perc07_AvgAUC', 'Perc07_StdAUC',
                      'Perc08_AvgAcc', 'Perc08_StdAcc', 'Perc08_AvgAUC', 'Perc08_StdAUC',
                      'Perc095_AvgAcc', 'Perc095_StdAcc', 'Perc095_AvgAUC', 'Perc095_StdAUC',
                      'Perc09_AvgAcc', 'Perc09_StdAcc', 'Perc09_AvgAUC', 'Perc09_StdAUC',
                      'MaxAvgAcc', 'MaxAvgAUC',
                      'Perc0_ScannerAcc', 'Perc0_ScannerAUC',
                      'Perc07_ScannerAcc', 'Perc07_ScannerAUC',
                      'Perc08_ScannerAcc', 'Perc08_ScannerAUC',
                      'Perc095_ScannerAcc', 'Perc095_ScannerAUC',
                      'Perc09_ScannerAcc', 'Perc09_ScannerAUC']
columns.extend(additional_columns)
df = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df.sort_values(by='Perc0_AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,weights_elastic,Perc0_AvgAcc,Perc0_StdAcc,...,Perc0_ScannerAcc,Perc0_ScannerAUC,Perc07_ScannerAcc,Perc07_ScannerAUC,Perc08_ScannerAcc,Perc08_ScannerAUC,Perc095_ScannerAcc,Perc095_ScannerAUC,Perc09_ScannerAcc,Perc09_ScannerAUC
0,2,2,2,4,0.5,0.1,10,1,82.07,2.27,...,0.833333,0.428571,0.916667,0.471429,0.888889,0.457143,0.916667,0.471429,0.888889,0.457143


In [203]:
df[["Perc0_AvgAcc", "Perc0_ScannerAcc"]]

Unnamed: 0,Perc0_AvgAcc,Perc0_ScannerAcc
0,82.07,0.833333


In [197]:
df[['Perc0_AvgAcc', 'Perc07_AvgAcc', 'Perc08_AvgAcc', 'Perc095_AvgAcc', 'Perc09_AvgAcc']].sort_values(by='Perc0_AvgAcc', ascending=False)

Unnamed: 0,Perc0_AvgAcc,Perc07_AvgAcc,Perc08_AvgAcc,Perc095_AvgAcc,Perc09_AvgAcc
0,90.6,89.2,91.07,89.19,90.6


In [180]:
df[['Perc0_ScannerAcc', 'Perc07_ScannerAcc', 'Perc08_ScannerAcc', 'Perc095_ScannerAcc', 'Perc09_ScannerAcc']].sort_values(by='Perc0_ScannerAcc', ascending=False)

Unnamed: 0,Perc0_ScannerAcc,Perc07_ScannerAcc,Perc08_ScannerAcc,Perc095_ScannerAcc,Perc09_ScannerAcc
0,0.833333,0.823529,0.843137,0.833333,0.823529


# 1024

In [158]:
# GNN with one layer masking and L1 norm regularization
with open('./modified_XGW-GAT_1024/save_results_tuning_1024_sparef_gcn_baseline_masking_mae_weights_elastic_nullingout.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        for perc in ['perc 0.7', 'perc 0.8', 'perc 0.95', 'perc 0.9', 'perc 0']:
            value = value.replace(perc, "")
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda', 'weights_elastic']
additional_columns = ['Perc0_AvgAcc', 'Perc0_StdAcc', 'Perc0_AvgAUC', 'Perc0_StdAUC',
                      'Perc07_AvgAcc', 'Perc07_StdAcc', 'Perc07_AvgAUC', 'Perc07_StdAUC',
                      'Perc08_AvgAcc', 'Perc08_StdAcc', 'Perc08_AvgAUC', 'Perc08_StdAUC',
                      'Perc095_AvgAcc', 'Perc095_StdAcc', 'Perc095_AvgAUC', 'Perc095_StdAUC',
                      'Perc09_AvgAcc', 'Perc09_StdAcc', 'Perc09_AvgAUC', 'Perc09_StdAUC',
                      'MaxAvgAcc', 'MaxAvgAUC',
                      'Perc0_TreatmentAcc', 'Perc0_TreatmentAUC', 'Perc0_ScannerAcc', 'Perc0_ScannerAUC',
                      'Perc07_TreatmentAcc', 'Perc07_TreatmentAUC', 'Perc07_ScannerAcc', 'Perc07_ScannerAUC',
                      'Perc08_TreatmentAcc', 'Perc08_TreatmentAUC', 'Perc08_ScannerAcc', 'Perc08_ScannerAUC',
                      'Perc095_TreatmentAcc', 'Perc095_TreatmentAUC', 'Perc095_ScannerAcc', 'Perc095_ScannerAUC',
                      'Perc09_TreatmentAcc', 'Perc09_TreatmentAUC', 'Perc09_ScannerAcc', 'Perc09_ScannerAUC']
columns.extend(additional_columns)
df = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df.sort_values(by='Perc0_AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,weights_elastic,Perc0_AvgAcc,Perc0_StdAcc,...,Perc08_ScannerAcc,Perc08_ScannerAUC,Perc095_TreatmentAcc,Perc095_TreatmentAUC,Perc095_ScannerAcc,Perc095_ScannerAUC,Perc09_TreatmentAcc,Perc09_TreatmentAUC,Perc09_ScannerAcc,Perc09_ScannerAUC
0,2,2,2,4,0.5,10,10,0.5,82.95,0.95,...,0.911765,0.798701,0.817223,0.508148,0.852941,0.554383,0.801406,0.494483,0.901961,0.732955


In [159]:
df[["Perc0_AvgAcc", "Perc0_ScannerAcc"]]

Unnamed: 0,Perc0_AvgAcc,Perc0_ScannerAcc
0,82.95,0.852941


In [167]:
# GNN with one layer masking and L1 norm regularization
with open('./modified_XGW-GAT_1024/save_results_tuning_1024_gcn_baseline_masking_vae_weights_elastic_nullingout.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        for perc in ['perc 0.7', 'perc 0.8', 'perc 0.95', 'perc 0.9', 'perc 0' ]:
            value = value.replace(perc, "")
        # get rid of combinations from 0 to 4 of (0, 1), ...
        for i in range(5):
            for j in range(5):
                value = value.replace(f"({i}, {j})", "")
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout', 'loss_lambda', 'weights_lambda', 'weights_elastic']
additional_columns = ['Perc0_AvgAcc', 'Perc0_StdAcc', 'Perc0_AvgAUC', 'Perc0_StdAUC',
                      'Perc07_AvgAcc', 'Perc07_StdAcc', 'Perc07_AvgAUC', 'Perc07_StdAUC',
                      'Perc08_AvgAcc', 'Perc08_StdAcc', 'Perc08_AvgAUC', 'Perc08_StdAUC',
                      'Perc095_AvgAcc', 'Perc095_StdAcc', 'Perc095_AvgAUC', 'Perc095_StdAUC',
                      'Perc09_AvgAcc', 'Perc09_StdAcc', 'Perc09_AvgAUC', 'Perc09_StdAUC',
                      'MaxAvgAcc', 'MaxAvgAUC',
                      'Perc0_TreatmentAcc', 'Perc0_TreatmentAUC', 'Perc0_ScannerAcc', 'Perc0_ScannerAUC',
                      'Perc07_TreatmentAcc', 'Perc07_TreatmentAUC', 'Perc07_ScannerAcc', 'Perc07_ScannerAUC',
                      'Perc08_TreatmentAcc', 'Perc08_TreatmentAUC', 'Perc08_ScannerAcc', 'Perc08_ScannerAUC',
                      'Perc095_TreatmentAcc', 'Perc095_TreatmentAUC', 'Perc095_ScannerAcc', 'Perc095_ScannerAUC',
                      'Perc09_TreatmentAcc', 'Perc09_TreatmentAUC', 'Perc09_ScannerAcc', 'Perc09_ScannerAUC',
                      "Sim_Masks_0,1", "Sim_Masks_0,2", "Sim_Masks_0,3", "Sim_Masks_0,4",
                      "Sim_Masks_1,0", "Sim_Masks_1,2", "Sim_Masks_1,3", "Sim_Masks_1,4",
                      "Sim_Masks_2,0", "Sim_Masks_2,1", "Sim_Masks_2,3", "Sim_Masks_2,4",
                      "Sim_Masks_3,0", "Sim_Masks_3,1", "Sim_Masks_3,2", "Sim_Masks_3,4",
                      "Sim_Masks_4,0", "Sim_Masks_4,1", "Sim_Masks_4,2", "Sim_Masks_4,3"]
columns.extend(additional_columns)
df = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df.sort_values(by='Perc0_AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,loss_lambda,weights_lambda,weights_elastic,Perc0_AvgAcc,Perc0_StdAcc,...,"Sim_Masks_2,3","Sim_Masks_2,4","Sim_Masks_3,0","Sim_Masks_3,1","Sim_Masks_3,2","Sim_Masks_3,4","Sim_Masks_4,0","Sim_Masks_4,1","Sim_Masks_4,2","Sim_Masks_4,3"
0,2,2,2,4,0.5,10,10,0.5,83.29,2.04,...,0.3061,0.375,0.3061,0.3061,0.3061,0.5644,0.375,0.375,0.375,0.5644


In [168]:
df[["Perc0_AvgAcc", "Perc0_ScannerAcc"]]

Unnamed: 0,Perc0_AvgAcc,Perc0_ScannerAcc
0,83.29,0.960784


In [175]:
df[["Sim_Masks_0,1", "Sim_Masks_0,2", "Sim_Masks_0,3", "Sim_Masks_0,4",
                      "Sim_Masks_1,0", "Sim_Masks_1,2", "Sim_Masks_1,3", "Sim_Masks_1,4",
                      "Sim_Masks_2,0", "Sim_Masks_2,1", "Sim_Masks_2,3", "Sim_Masks_2,4",
                      "Sim_Masks_3,0", "Sim_Masks_3,1", "Sim_Masks_3,2", "Sim_Masks_3,4",
                      "Sim_Masks_4,0", "Sim_Masks_4,1", "Sim_Masks_4,2", "Sim_Masks_4,3"]]

Unnamed: 0,"Sim_Masks_0,1","Sim_Masks_0,2","Sim_Masks_0,3","Sim_Masks_0,4","Sim_Masks_1,0","Sim_Masks_1,2","Sim_Masks_1,3","Sim_Masks_1,4","Sim_Masks_2,0","Sim_Masks_2,1","Sim_Masks_2,3","Sim_Masks_2,4","Sim_Masks_3,0","Sim_Masks_3,1","Sim_Masks_3,2","Sim_Masks_3,4","Sim_Masks_4,0","Sim_Masks_4,1","Sim_Masks_4,2","Sim_Masks_4,3"
0,1.0,1.0,0.3061,0.375,1.0,1.0,0.3061,0.375,1.0,1.0,0.3061,0.375,0.3061,0.3061,0.3061,0.5644,0.375,0.375,0.375,0.5644


In [176]:
np.mean(formatted_data[0][-20:])

0.56077