In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle as pkl
import re

# Fully Connected

In [16]:
# Fully connected without masking
with open('./modified_XGW-GAT/save_results_tuning_fc.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
21,4,,,64,,85.44,0.63,55.5,0.28,86.08,55.78,0.789104,0.601247,0.888889,0.71875
5,2,,,64,,85.44,0.0,54.67,1.48,85.44,56.16,0.792619,0.587427,0.888889,0.71875
28,8,,,32,,85.13,0.32,55.7,1.0,85.44,56.7,0.611599,0.633333,0.677778,0.64375
27,8,,,16,,84.81,1.27,58.98,1.96,86.08,60.95,0.822496,0.517806,0.911111,0.64375
30,8,,,128,,84.81,0.63,55.62,2.88,85.44,58.5,0.789104,0.565366,0.9,0.725
22,4,,,128,,84.49,0.32,53.3,0.12,84.81,53.42,0.794376,0.528691,0.911111,0.6875
14,3,,,128,,84.49,1.58,55.65,0.68,86.08,56.33,0.782074,0.581044,0.888889,0.71875
13,3,,,64,,84.49,0.32,52.76,1.02,84.81,53.78,0.794376,0.596465,0.888889,0.71875
12,3,,,32,,84.49,0.32,55.9,0.8,84.81,56.7,0.792619,0.599387,0.888889,0.71875
20,4,,,32,,84.49,0.32,56.27,0.26,84.81,56.53,0.810193,0.538266,0.922222,0.69375


In [17]:
# Fully connected with one layer masking
with open('./modified_XGW-GAT/save_results_tuning_fc_masking.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
0,2,,,2,,85.44,0.0,57.36,2.47,85.44,59.83,0.720562,0.619557,0.822222,0.68125
26,8,,,8,,85.44,0.0,55.76,0.2,85.44,55.96,0.773286,0.579712,0.877778,0.7125
30,8,,,128,,85.13,0.32,57.34,1.65,85.44,58.99,0.760984,0.588212,0.888889,0.675
5,2,,,64,,85.13,0.32,56.5,0.03,85.44,56.53,0.787346,0.584236,0.922222,0.7375
6,2,,,128,,84.81,0.0,54.1,0.84,84.81,54.94,0.790861,0.574404,0.911111,0.6875
23,4,,,256,,84.81,0.0,53.49,0.81,84.81,54.3,0.794376,0.588491,0.911111,0.6875
22,4,,,128,,84.81,0.0,56.01,1.11,84.81,57.12,0.817223,0.550494,0.922222,0.69375
20,4,,,32,,84.81,0.63,56.41,1.98,85.44,58.4,0.655536,0.588169,0.788889,0.6625
18,4,,,8,,84.81,0.63,57.37,2.15,85.44,59.51,0.752197,0.606813,0.866667,0.70625
14,3,,,128,,84.49,0.95,58.09,0.43,85.44,58.52,0.648506,0.583914,0.8,0.7125


# GCN / GNN

In [13]:
# GNN without masking
with open('./modified_XGW-GAT/save_results_tuning_gcn.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
8,3,2,2,2,0.5,81.96,2.22,54.86,2.28,84.18,57.13,0.651408,0.561801,0.8,0.625
7,2,3,4,4,0.5,80.7,3.48,58.14,3.39,84.18,61.53,0.77993,0.520094,0.866667,0.6625
3,2,2,4,4,0.5,80.38,0.0,55.35,0.47,80.38,55.81,0.786972,0.576156,0.855556,0.65625
9,3,2,2,4,0.5,80.06,0.95,57.33,0.42,81.01,57.74,0.75,0.525877,0.877778,0.66875
11,3,2,4,4,0.5,80.06,0.32,54.98,1.24,80.38,56.22,0.757042,0.577954,0.911111,0.775
14,3,3,4,2,0.5,79.11,0.63,59.2,0.22,79.75,59.42,0.757042,0.534126,0.888889,0.7625
6,2,3,4,2,0.5,78.8,4.11,53.06,3.06,82.91,56.12,0.746479,0.535698,0.877778,0.66875
15,3,3,4,4,0.5,78.48,2.53,56.83,0.08,81.01,56.9,0.77993,0.496188,0.9,0.725
2,2,2,4,2,0.5,77.85,0.63,59.54,2.9,78.48,62.44,0.751761,0.558818,0.866667,0.6625
12,3,3,2,2,0.5,77.53,2.22,62.14,4.37,79.75,66.51,0.730634,0.573916,0.822222,0.76875


In [14]:
# GNN with one layer masking
with open('./modified_XGW-GAT/save_results_tuning_gcn_masking.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
8,3,2,2,2,0.5,82.59,2.22,55.76,1.9,84.81,57.67,0.772887,0.511846,0.888889,0.63125
12,3,3,2,2,0.5,81.96,0.32,60.04,1.05,82.28,61.1,0.723592,0.557699,0.855556,0.74375
14,3,3,4,2,0.5,81.96,2.22,60.85,3.51,84.18,64.37,0.790493,0.530475,0.911111,0.73125
5,2,3,2,4,0.5,80.7,0.32,56.04,3.15,81.01,59.19,0.78169,0.568973,0.888889,0.7625
9,3,2,2,4,0.5,80.7,0.32,55.34,2.73,81.01,58.07,0.765845,0.51555,0.888889,0.675
3,2,2,4,4,0.5,80.38,0.0,55.64,2.5,80.38,58.13,0.792254,0.579354,0.866667,0.70625
7,2,3,4,4,0.5,79.75,2.53,60.84,0.01,82.28,60.85,0.776408,0.5339,0.866667,0.6625
13,3,3,2,4,0.5,79.43,2.85,59.06,3.69,82.28,62.75,0.794014,0.548545,0.877778,0.66875
15,3,3,4,4,0.5,79.43,3.48,55.7,2.23,82.91,57.93,0.785211,0.535246,0.877778,0.66875
6,2,3,4,2,0.5,78.8,2.22,53.21,3.21,81.01,56.43,0.723592,0.521839,0.866667,0.6625


In [18]:
# GNN with Masked Autoencoder
with open('./modified_XGW-GAT/save_results_tuning_gcn_mae.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)

Unnamed: 0,MLP_layers,GNN_layers,num_head,hidden_dim,dropout,AvgAcc,StdAcc,AvgAUC,StdAUC,MaxAcc,MaxAUC,TreatmentAcc,TreatmentAUC,ScannerAcc,ScannerAUC
1,2,2,2,4,0.5,83.86,1.58,64.32,2.9,85.44,67.23,0.774648,0.528849,0.944444,0.88125
2,2,2,4,2,0.5,82.91,0.63,56.91,2.66,83.54,59.58,0.739437,0.503543,0.855556,0.7
11,3,2,4,4,0.5,82.91,1.27,56.91,1.52,84.18,58.43,0.772887,0.555674,0.877778,0.75625
15,3,3,4,4,0.5,82.91,1.27,55.49,0.17,84.18,55.66,0.799296,0.571665,0.888889,0.71875
14,3,3,4,2,0.5,82.59,0.32,57.57,0.08,82.91,57.65,0.746479,0.535698,0.888889,0.7625
12,3,3,2,2,0.5,81.96,1.58,54.06,0.89,83.54,54.94,0.739437,0.55534,0.844444,0.69375
5,2,3,2,4,0.5,81.33,2.22,62.17,3.65,83.54,65.81,0.764085,0.578234,0.888889,0.80625
3,2,2,4,4,0.5,80.38,0.63,59.93,1.48,81.01,61.41,0.765845,0.547425,0.922222,0.78125
9,3,2,2,4,0.5,80.38,1.27,55.51,2.24,81.65,57.75,0.698944,0.526836,0.855556,0.7
10,3,2,4,2,0.5,80.06,0.95,63.93,0.66,81.01,64.58,0.764085,0.558312,0.911111,0.81875


# GAT

In [None]:
# GAT without masking
with open('./modified_XGW-GAT/save_results_tuning_gat.pkl', 'rb') as f:
    results = pkl.load(f)

# Function to extract numbers from a string
def extract_numbers(s):
    return [float(n) for n in re.findall(r"[-+]?\d*\.\d+|\d+", s)]

# Process the data
formatted_data = []
for key, values in results.items():
    row = list(key)
    for value in values:
        numbers = extract_numbers(value)
        row.extend(numbers)
    formatted_data.append(row)

# Creating DataFrame with appropriate column names
columns = ['MLP_layers', 'GNN_layers', 'num_head', 'hidden_dim', 'dropout']
additional_columns = ['AvgAcc', 'StdAcc', 'AvgAUC', 'StdAUC', 'MaxAcc', 'MaxAUC', 'TreatmentAcc', 'TreatmentAUC', 'ScannerAcc', 'ScannerAUC']
columns.extend(additional_columns)
df_gcn = pd.DataFrame(formatted_data, columns=columns)

# Display the DataFrame
df_gcn.sort_values(by='AvgAcc', ascending=False)