In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install torch torchvision torchaudio
!pip install torch-geometric
!pip install torchviz

Collecting torch-geometric
  Downloading torch_geometric-2.4.0-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-geometric
Successfully installed torch-geometric-2.4.0
Collecting torchviz
  Downloading torchviz-0.0.2.tar.gz (4.9 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: torchviz
  Building wheel for torchviz (setup.py) ... [?25l[?25hdone
  Created wheel for torchviz: filename=torchviz-0.0.2-py3-none-any.whl size=4131 sha256=01e1c2afef1e1916ec0ca1efcb57da3c3099c5fcd1619b8e6ad258d159fbf54f
  Stored in directory: /root/.cache/pip/wheels/4c/97/88/a02973217949e0db0c9f4346d154085f4725f99c4f15a87094
Successfully built torchviz
Installing collected packages: torchviz
Successfully installed torchviz-0.0.2


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from torch_geometric.datasets import GNNBenchmarkDataset
import torch_geometric.nn as geo_nn
from torch_geometric.data import DataLoader
import networkx as nx
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.data import Data
from torch_geometric.transforms import Constant
from tqdm.auto import tqdm
from itertools import combinations
import numpy as np

#  HELPERS

In [None]:

# Defining some data preprocessing functions
def graph_to_pyg_data(graph):
    edge_index = torch.tensor(list(graph.edges)).t().contiguous()
    x = torch.eye(graph.number_of_nodes())  # Node features, identity matrix in this example
    y = torch.tensor(list(nx.get_node_attributes(graph, 'label').values()))  # Node labels

    return Data(x=x, edge_index=edge_index, y=y)

def to_networkx(data):
    edge_index = data.edge_index.cpu().numpy()
    edge_attr = None
    if data.edge_attr is not None:
        edge_attr = data.edge_attr.cpu().numpy()

    G = nx.Graph()
    G.add_nodes_from(range(data.num_nodes))
    G.add_edges_from(edge_index.T)

    if edge_attr is not None:
        for i, (src, tgt) in enumerate(edge_index.T):
            G[src][tgt]['edge_attr'] = edge_attr[i]

    return G

    # min max normalize [0, 1]
def min_max_normalize(data, new_min=0, new_max=1):
    # Find the min and max values in the data
    min_val = min(data)
    max_val = max(data)
    if min_val == max_val:
      return [1 for _ in range(len(data))]
    normalized_data = [(x - min_val) / (max_val - min_val) * (new_max - new_min) + new_min for x in data]
    return normalized_data

# top k% of values are 1 rest are 0
def top_k(label_list, k):
    k = round(k*len(label_list))
    input_list = min_max_normalize(label_list)
    # Sort the list in descending order
    sorted_list = sorted(input_list, reverse=True)
    # Determine the threshold index
    threshold_index = min(k, len(sorted_list))
    # Set the first k elements to 1 and the rest to 0
    thresholded_list = [1 if i < threshold_index else 0 for i in range(len(sorted_list))]
    # Create a mapping from original indices to sorted indices
    index_mapping = {original: sorted_index for sorted_index, original in enumerate(sorted(range(len(input_list)), key=lambda x: input_list[x], reverse=True))}
    # Sort the thresholded list back to the original order
    thresholded_list_original_order = [thresholded_list[index_mapping[i]] for i in range(len(input_list))]

    return thresholded_list_original_order

# continuous [0,1]
def continuous(label_list, k):
  label_list = min_max_normalize(label_list)
  return label_list

# value above k are 1 rest are 0
def within_k(label_list, k):
  k = 1-k
  label_list = min_max_normalize(label_list)
  data = [1 if x > k else 0 for x in label_list]
  return data


def max_normalize_binary(label_list, k):
  k = 1-k
  # normalize data
  max_val = max(label_list)
  normalized_data = [(x / max_val) for x in label_list]
  data = [1 if x > k else 0 for x in normalized_data]
  return data

def max_normalize(label_list, k):
  # normalize data
  max_val = max(label_list)
  normalized_data = [(x / max_val) for x in label_list]
  return normalized_data

# Normalize dataset function
def dataset_normalize(dataset, normalize_function, normalize_param):
  for graph_idx in tqdm(range(len(dataset))):
    data = dataset[graph_idx]
    node_labels_list = data.y.tolist()
    normalized_node_labels_list = normalize_function(node_labels_list, normalize_param)
    data.y = torch.tensor(normalized_node_labels_list)
  return dataset

# TAKES IN A NETWORKX GRAPH AND OUTPUTS A TENSOR THAT IS THE NODE FEATURES
def get_features(nxgraph):
  # get features
  node_degrees = dict(nxgraph.degree())
  degree_centrality = nx.degree_centrality(nxgraph)
  betweenness_centrality = nx.betweenness_centrality(nxgraph)
  closeness_centrality = nx.closeness_centrality(nxgraph)
  eigenvector_centrality = nx.eigenvector_centrality(nxgraph)
  pagerank_centrality = nx.pagerank(nxgraph)
  harmonic_centrality = nx.harmonic_centrality(nxgraph)
  load_centrality = nx.load_centrality(nxgraph)
  clustering_coefficient = nx.clustering(nxgraph)
  # make it into an array
  features_array = np.array([
    list(node_degrees.values()),
    list(degree_centrality.values()),
    list(betweenness_centrality.values()),
    list(closeness_centrality.values()),
    list(eigenvector_centrality.values()),
    list(pagerank_centrality.values()),
    list(harmonic_centrality.values()),
    list(load_centrality.values()),
    list(clustering_coefficient.values())])
  features_array = features_array.T
  return torch.tensor(features_array, dtype=torch.float32)


In [None]:
# GCN Layer

class GCNLayer(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(GCNLayer, self).__init__()
        self.gcn = GCNConv(input_size, hidden_size)

    def forward(self, x, edge_index):
        return self.gcn(x, edge_index)


In [None]:
# Dataset

class PairwiseRankingDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [None]:
# Load training data
loaded_dataset = torch.load('/content/drive/MyDrive/Colab Notebooks/Decomp/LabeledData/NetworkxGraphs/train_200nodes_500graphs.pt')
normalize_function = max_normalize_binary
normalize_param = 0.2 # top 20% of nodes are 1 rest are 0
dataset_normalize(loaded_dataset, normalize_function=normalize_function, normalize_param=normalize_param)
labels = torch.empty(0, dtype=torch.float32)
features = torch.empty(0, dtype=torch.float32)
for graph in tqdm(loaded_dataset):
    temp_labels = graph.y
    labels = torch.cat([labels, temp_labels], dim=0)
    graph.x = get_features(to_networkx(graph))
    temp_features = graph.x
    features = torch.cat([features, temp_features], dim=0)

print(labels.shape)
print(features.shape)

train_graphs = loaded_dataset
torch.save(train_graphs, '/content/drive/MyDrive/Colab Notebooks/Decomp/LabeledData/NetworkxGraphs/train_200nodes_500graphs_features.pt')

In [None]:
# Load val data
loaded_dataset = torch.load('/content/drive/MyDrive/Colab Notebooks/Decomp/LabeledData/NetworkxGraphs/val_200nodes_200graphs.pt')
normalize_function = max_normalize_binary
normalize_param = 0.2 # top 20% of nodes are 1 rest are 0
dataset_normalize(loaded_dataset, normalize_function=normalize_function, normalize_param=normalize_param)
labels = torch.empty(0, dtype=torch.float32)
features = torch.empty(0, dtype=torch.float32)
for graph in tqdm(loaded_dataset):
    temp_labels = graph.y
    labels = torch.cat([labels, temp_labels], dim=0)
    graph.x = get_features(to_networkx(graph))
    temp_features = graph.x
    features = torch.cat([features, temp_features], dim=0)

print(labels.shape)
print(features.shape)

val_graphs = loaded_dataset
torch.save(val_graphs, '/content/drive/MyDrive/Colab Notebooks/Decomp/LabeledData/NetworkxGraphs/val_200nodes_200graphs_features.pt')

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

torch.Size([40000])
torch.Size([40000, 9])


In [None]:
train_graphs = torch.load('/content/drive/MyDrive/Colab Notebooks/Decomp/LabeledData/NetworkxGraphs/train_200nodes_500graphs_features.pt')
val_graphs = torch.load('/content/drive/MyDrive/Colab Notebooks/Decomp/LabeledData/NetworkxGraphs/val_200nodes_200graphs_features.pt')

In [None]:
import torch
from torchviz import make_dot
from IPython.display import Image

batch_size = 2
epochs = 50
input_size = 9
gcn_hidden_size = 256
model = SimpleRankModel(input_size, gcn_hidden_size)

# Dummy input
dummy_input = train_graphs[0]

# Generate a graph of the model architecture
graph = make_dot(model(dummy_input), params=dict(model.named_parameters()))

# Save the graph to a file (e.g., PNG)
graph.render("model_graph", format="png", cleanup=True)

# Display the saved image
Image("model_graph.png")

# Simple Rank

In [None]:
class SimpleRankModel(nn.Module):
    def __init__(self, input_size, gcn_hidden_size, dropout_prob=0.2):
        super(SimpleRankModel, self).__init__()
        # GCN layers
        self.gcn1 = GCNLayer(input_size, gcn_hidden_size)
        self.gcn2 = GCNLayer(gcn_hidden_size, gcn_hidden_size)
        self.gcn3 = GCNLayer(gcn_hidden_size, gcn_hidden_size)

        self.fc1 = nn.Linear(gcn_hidden_size, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 2)
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(p=dropout_prob)
        self.batch_norm1 = nn.BatchNorm1d(128)
        self.batch_norm2 = nn.BatchNorm1d(64)
        self.batch_norm3 = nn.BatchNorm1d(32)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        # GCN layers
        x = self.gcn1(x, edge_index)
        x = self.relu(x)
        x = self.gcn2(x, edge_index)
        x = self.relu(x)

        # MLP layers
        x = self.batch_norm1(self.fc1(x))
        x = self.relu(x)
        x = self.dropout(x)
        x = self.batch_norm2(self.fc2(x))
        x = self.relu(x)
        x = self.dropout(x)
        x = self.batch_norm3(self.fc3(x))
        x = self.relu(x)
        x = self.fc4(x)
        return x

def simple_ranking_loss(scores, labels):
    # Calculate pairwise differences only for positive pairs (labels == 1)
    pairwise_diff = scores[labels == 1].view(-1, 1) - scores[labels == 0].view(1, -1)
    return -torch.mean(nn.functional.logsigmoid(pairwise_diff))

# Function to calculate accuracy
def calculate_accuracy(outputs, labels):
    predictions = outputs
    TP = ((predictions == 1) & (labels == 1)).sum().item()
    FP = ((predictions == 1) & (labels == 0)).sum().item()
    if TP + FP == 0:
        TPR = 1
    else:
        TPR = TP / (TP + FP)
    correct = (predictions == labels).sum().item()
    total = labels.size(0)
    accuracy = correct / total
    return accuracy, TPR


def train_model_simple(model, train_loader, val_loader, optimizer, criterion, batch_size, epochs=10):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    val_tprs = []
    train_tprs = []
    base_path = '/content/drive/MyDrive/Colab Notebooks/Decomp/Models/simplerank'
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        start = 0
        train_epoch_loss = 0.0
        train_epoch_accuracy = 0.0
        train_epoch_tprs = 0.0
        total_length = len(train_loader) // batch_size
        for i in range(start, len(train_loader), batch_size):
            optimizer.zero_grad()
            scores = torch.empty(0, dtype=torch.float32, requires_grad=True).to(device)
            labels = torch.empty(0, dtype=torch.float32, requires_grad=True).to(device)
            for j in range(start, start + batch_size):
                if j >= len(train_loader):
                    break
                graph = train_loader[j]
                temp = model(graph)
                temp_softmax = F.softmax(temp, dim=1)
                temp2 = torch.argmax(temp_softmax, dim=1)
                scores = torch.cat([scores, temp2], dim=0)
                temp = graph.y
                labels = torch.cat([labels, temp], dim=0)
            start += batch_size
            scores = scores.squeeze().to(device)
            loss = criterion(scores, labels)
            train_epoch_loss += loss.item()

            # Calculate accuracy
            accuracy,tpr = calculate_accuracy(scores, labels)
            train_epoch_tprs += tpr
            train_epoch_accuracy += accuracy

            # Backward pass and optimization
            loss.backward()
            optimizer.step()
        train_tprs.append(train_epoch_tprs / total_length)
        average_train_loss = train_epoch_loss / total_length
        average_train_accuracy = train_epoch_accuracy / total_length
        train_losses.append(average_train_loss)
        train_accuracies.append(average_train_accuracy)

        # Validation
        model.eval()
        val_epoch_loss = 0.0
        val_epoch_accuracy = 0.0
        val_epoch_tpr = 0.0
        test_length = len(val_loader) // batch_size
        with torch.no_grad():
            start = 0
            for i in range(start, len(val_loader), batch_size):
                graph.to(device)
                optimizer.zero_grad()
                scores = torch.empty(0, dtype=torch.float32, requires_grad=True).to(device)
                labels = torch.empty(0, dtype=torch.float32, requires_grad=True).to(device)
                for j in range(start, start + batch_size):
                    if j >= len(val_loader):
                        break
                    graph = val_loader[j]
                    temp = model(graph)
                    temp_softmax = F.softmax(temp, dim=1)
                    temp2 = torch.argmax(temp_softmax, dim=1)
                    scores = torch.cat([scores, temp2], dim=0)
                    labels = torch.cat([labels, graph.y], dim=0)
                start += batch_size
                scores = scores.squeeze().to(device)
                loss = criterion(scores, labels)
                val_epoch_loss += loss.item()

                # Calculate accuracy
                accuracy, tpr = calculate_accuracy(scores, labels)
                val_epoch_accuracy += accuracy
                val_epoch_tpr += tpr


        val_tprs.append(val_epoch_tpr / test_length)
        average_val_loss = val_epoch_loss / test_length
        average_val_accuracy = val_epoch_accuracy / test_length
        val_losses.append(average_val_loss)
        val_accuracies.append(average_val_accuracy)
        save_path = f'{base_path}_{epoch+1}.pth'
        torch.save(model.state_dict(), save_path)

        # Print progress
        print(f'Epoch [{epoch+1}/{epochs}], '
              f'Train Loss: {average_train_loss:.4f}, Train Acc: {average_train_accuracy:.4f}, '
              f'Train TPR: {train_epoch_tprs / total_length:.4f}, '
              f'Val Loss: {average_val_loss:.4f}, Val Acc: {average_val_accuracy:.4f}, '
              f'Val TPR: {val_epoch_tpr / test_length:.4f}')
    # Plot the training and validation loss curves
    plt.plot(range(1, epochs + 1), train_losses, label='Training Loss')
    plt.plot(range(1, epochs + 1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()


#  TRAIN SIMPLERANK

In [None]:
batch_size = 128
epochs = 50
input_size = 9
gcn_hidden_size = 256
simple_rank_model = SimpleRankModel(input_size, gcn_hidden_size)
criterion = simple_ranking_loss
optimizer = optim.Adam(simple_rank_model.parameters(), lr=0.01)

train_model_simple(simple_rank_model, train_graphs, val_graphs, optimizer, criterion, batch_size=batch_size, epochs=epochs)

Epoch [1/50], Train Loss: 1.0063, Train Acc: 0.6260, Train TPR: 0.5589, Val Loss: 1.3876, Val Acc: 0.9700, Val TPR: 0.8911
Epoch [2/50], Train Loss: 1.0058, Train Acc: 0.6259, Train TPR: 0.5592, Val Loss: 1.3871, Val Acc: 0.9718, Val TPR: 0.8918
Epoch [3/50], Train Loss: 1.0063, Train Acc: 0.6258, Train TPR: 0.5589, Val Loss: 1.3885, Val Acc: 0.9681, Val TPR: 0.8901
Epoch [4/50], Train Loss: 1.0086, Train Acc: 0.6228, Train TPR: 0.5571, Val Loss: 1.3890, Val Acc: 0.9711, Val TPR: 0.8912
Epoch [5/50], Train Loss: 1.0046, Train Acc: 0.6263, Train TPR: 0.5599, Val Loss: 1.3902, Val Acc: 0.9748, Val TPR: 0.8923
Epoch [6/50], Train Loss: 1.0079, Train Acc: 0.6239, Train TPR: 0.5577, Val Loss: 1.4042, Val Acc: 0.9670, Val TPR: 0.8862
Epoch [7/50], Train Loss: 1.0058, Train Acc: 0.6255, Train TPR: 0.5591, Val Loss: 1.3931, Val Acc: 0.9720, Val TPR: 0.8906
Epoch [8/50], Train Loss: 1.0022, Train Acc: 0.6292, Train TPR: 0.5617, Val Loss: 1.3851, Val Acc: 0.9744, Val TPR: 0.8933
Epoch [9/50], Tr

KeyboardInterrupt: ignored

#  Binary Class Model

In [None]:
class BinaryClassificationModel(nn.Module):
    def __init__(self, input_size, gcn_hidden_size, dropout_prob=0.3):
        super(BinaryClassificationModel, self).__init__()
        # GCN layers
        self.gcn1 = GCNLayer(input_size, gcn_hidden_size)
        self.gcn2 = GCNLayer(gcn_hidden_size, gcn_hidden_size)
        self.gcn3 = GCNLayer(gcn_hidden_size, gcn_hidden_size)

        self.fc1 = nn.Linear(gcn_hidden_size, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 2)
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(p=dropout_prob)
        self.batch_norm1 = nn.BatchNorm1d(128)
        self.batch_norm2 = nn.BatchNorm1d(64)
        self.batch_norm3 = nn.BatchNorm1d(32)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        # GCN layers
        x = self.gcn1(x, edge_index)
        x = self.relu(x)
        x = self.gcn2(x, edge_index)
        x = self.relu(x)

        # MLP layers
        x = self.batch_norm1(self.fc1(x))
        x = self.relu(x)
        x = self.dropout(x)
        x = self.batch_norm2(self.fc2(x))
        x = self.relu(x)
        x = self.dropout(x)
        x = self.batch_norm3(self.fc3(x))
        x = self.relu(x)
        x = self.fc4(x)
        return x



def train_model_BC(model, train_loader, val_loader, optimizer, criterion, batch_size, epochs=10):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    val_tprs = []
    train_tprs = []
    base_path = '/content/drive/MyDrive/Colab Notebooks/Decomp/Models/BC'
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        start = 0
        train_epoch_loss = 0.0
        train_epoch_accuracy = 0.0
        train_epoch_tprs = 0.0
        total_length = len(train_loader) // batch_size
        for i in range(start, len(train_loader), batch_size):
            optimizer.zero_grad()
            scores = torch.empty(0, dtype=torch.float32, requires_grad=True).to(device)
            labels = torch.empty(0, dtype=torch.float32, requires_grad=True).to(device)
            for j in range(start, start + batch_size):
                if j >= len(train_loader):
                    break
                graph = train_loader[j]
                temp = model(graph)
                temp_softmax = F.softmax(temp, dim=1)
                temp2 = torch.argmax(temp_softmax, dim=1)
                scores = torch.cat([scores, temp2], dim=0)
                temp = graph.y
                labels = torch.cat([labels, temp], dim=0)
            start += batch_size
            scores = scores.squeeze().to(device)
            loss = criterion(scores, labels)
            train_epoch_loss += loss.item()

            # Calculate accuracy
            accuracy,tpr = calculate_accuracy(scores, labels)
            train_epoch_tprs += tpr
            train_epoch_accuracy += accuracy

            # Backward pass and optimization
            loss.backward()
            optimizer.step()
        train_tprs.append(train_epoch_tprs / total_length)
        average_train_loss = train_epoch_loss / total_length
        average_train_accuracy = train_epoch_accuracy / total_length
        train_losses.append(average_train_loss)
        train_accuracies.append(average_train_accuracy)

        # Validation
        model.eval()
        val_epoch_loss = 0.0
        val_epoch_accuracy = 0.0
        val_epoch_tpr = 0.0
        test_length = len(val_loader) // batch_size
        with torch.no_grad():
            start = 0
            for i in range(start, len(val_loader), batch_size):
                graph.to(device)
                optimizer.zero_grad()
                scores = torch.empty(0, dtype=torch.float32, requires_grad=True).to(device)
                labels = torch.empty(0, dtype=torch.float32, requires_grad=True).to(device)
                for j in range(start, start + batch_size):
                    if j >= len(val_loader):
                        break
                    graph = val_loader[j]
                    temp = model(graph)
                    temp_softmax = F.softmax(temp, dim=1)
                    temp2 = torch.argmax(temp_softmax, dim=1)
                    scores = torch.cat([scores, temp2], dim=0)
                    labels = torch.cat([labels, graph.y], dim=0)
                start += batch_size
                scores = scores.squeeze().to(device)
                loss = criterion(scores, labels)
                val_epoch_loss += loss.item()

                # Calculate accuracy
                accuracy, tpr = calculate_accuracy(scores, labels)
                val_epoch_accuracy += accuracy
                val_epoch_tpr += tpr


        val_tprs.append(val_epoch_tpr / test_length)
        average_val_loss = val_epoch_loss / test_length
        average_val_accuracy = val_epoch_accuracy / test_length
        val_losses.append(average_val_loss)
        val_accuracies.append(average_val_accuracy)
        save_path = f'{base_path}_{epoch+1}.pth'
        torch.save(model.state_dict(), save_path)

        # Print progress
        print(f'Epoch [{epoch+1}/{epochs}], '
              f'Train Loss: {average_train_loss:.4f}, Train Acc: {average_train_accuracy:.4f}, '
              f'Train TPR: {train_epoch_tprs / total_length:.4f}, '
              f'Val Loss: {average_val_loss:.4f}, Val Acc: {average_val_accuracy:.4f}, '
              f'Val TPR: {val_epoch_tpr / test_length:.4f}')
    # Plot the training and validation loss curves
    plt.plot(range(1, epochs + 1), train_losses, label='Training Loss')
    plt.plot(range(1, epochs + 1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

class FocalLoss(nn.Module):
    def __init__(self, alpha=0.9, gamma=2, logits=True, reduce=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.logits = logits
        self.reduce = reduce

    def forward(self, inputs, targets):
        if self.logits:
            BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        else:
            BCE_loss = F.binary_cross_entropy(inputs, targets, reduction='none')

        pt = torch.exp(-BCE_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * BCE_loss

        if self.reduce:
            return torch.mean(focal_loss)
        else:
            return focal_loss


In [None]:
batch_size = 2
epochs = 50
input_size = 9
gcn_hidden_size = 256
BC_model = BinaryClassificationModel(input_size, gcn_hidden_size)
criterion = FocalLoss(alpha=1, gamma=5)
optimizer = optim.Adam(BC_model.parameters(), lr=0.005)

train_model_BC(BC_model, train_graphs, val_graphs, optimizer, criterion, batch_size=batch_size, epochs=epochs)

# PairwiseRank

In [None]:
class PairwiseRankModel(nn.Module):
    def __init__(self, input_size, gcn_hidden_size, dropout_prob=0.3):
        super(PairwiseRankModel, self).__init__()
        # GCN layers
        self.gcn1 = GCNLayer(input_size, gcn_hidden_size)
        self.gcn2 = GCNLayer(gcn_hidden_size, gcn_hidden_size)
        self.gcn3 = GCNLayer(gcn_hidden_size, gcn_hidden_size)

        self.fc1 = nn.Linear(gcn_hidden_size, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 2)
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(p=dropout_prob)
        self.batch_norm1 = nn.BatchNorm1d(128)
        self.batch_norm2 = nn.BatchNorm1d(64)
        self.batch_norm3 = nn.BatchNorm1d(32)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        # GCN layers
        x = self.gcn1(x, edge_index)
        x = self.relu(x)
        x = self.gcn2(x, edge_index)
        x = self.relu(x)

        # MLP layers
        x = self.batch_norm1(self.fc1(x))
        x = self.relu(x)
        x = self.dropout(x)
        x = self.batch_norm2(self.fc2(x))
        x = self.relu(x)
        x = self.dropout(x)
        x = self.batch_norm3(self.fc3(x))
        x = self.relu(x)
        x = self.fc4(x)
        return x


def train_model_pairwise(model, train_loader, val_loader, optimizer, criterion, batch_size, epochs=10):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    criterion = criterion.to(device)
    train_losses = []
    val_losses = []
    val_accuracies = []
    #base_path = '/content/drive/MyDrive/Colab Notebooks/Decomp/Models/PairwiseRank'
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        start = 0
        for i in range(start, len(train_loader), batch_size):
            optimizer.zero_grad()
            scores = torch.empty(0, dtype=torch.float32, requires_grad=True).to(device)
            labels = torch.empty(0, dtype=torch.float32, requires_grad=True).to(device)
            for j in range(start, start + batch_size):
                if j >= len(train_loader):
                    break
                graph = train_loader[j]
                temp = model(graph)
                temp_softmax = F.softmax(temp, dim=1)
                temp2 = torch.argmax(temp_softmax, dim=1)
                scores = torch.cat([scores, temp2], dim=0)
                temp = graph.y.to(device)
                labels = torch.cat([labels, temp], dim=0)
            start += batch_size
            # Create all possible pairs
            pairs_list = list(combinations(range(len(scores)), 2))
            # Calculate pairwise scores
            loss = 0
            pairwise_labels = torch.empty(0, dtype=torch.float32, requires_grad=True)
            pairwise_scores_list = []
            for pairs in pairs_list:
                if labels[pairs[0]] > labels[pairs[1]]:
                    pairwise_label = 1
                elif labels[pairs[0]] < labels[pairs[1]]:
                    pairwise_label = -1
                else:
                    pairwise_label = 0
                pairwise_scores_list.append([scores[pairs[0]], scores[pairs[1]]])
                pairwise_labels = torch.cat((pairwise_labels, torch.tensor([pairwise_label])))
            pairwise_labels = pairwise_labels.to(device)
            pairwise_scores = torch.tensor(pairwise_scores_list, dtype=torch.float32, requires_grad=True).to(device)
            print(pairwise_scores)
            loss = criterion(pairwise_scores[:, 0], pairwise_scores[:, 1], pairwise_labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        average_loss = running_loss / len(train_loader)
        train_losses.append(average_loss)
        print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {average_loss}")

    # Plot the training and validation loss curves
    plt.plot(range(1, epochs + 1), train_losses, label='Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

# REGRESSION

In [None]:
class RegressionModel(nn.Module):
    def __init__(self, input_size, gcn_hidden_size, dropout_prob=0.3):
        super(RegressionModel, self).__init__()
        # GCN layers
        self.gcn1 = GCNLayer(input_size, gcn_hidden_size)
        self.gcn2 = GCNLayer(gcn_hidden_size, gcn_hidden_size)
        self.gcn3 = GCNLayer(gcn_hidden_size, gcn_hidden_size)

        self.fc1 = nn.Linear(gcn_hidden_size, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(p=dropout_prob)
        self.batch_norm1 = nn.BatchNorm1d(128)
        self.batch_norm2 = nn.BatchNorm1d(64)
        self.batch_norm3 = nn.BatchNorm1d(32)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.gcn1(x, edge_index)
        x = self.relu(x)
        x = self.gcn2(x, edge_index)
        x = self.relu(x)

        # MLP layers
        x = self.batch_norm1(self.fc1(x))
        x = self.relu(x)
        x = self.dropout(x)
        x = self.batch_norm2(self.fc2(x))
        x = self.relu(x)
        x = self.dropout(x)
        x = self.batch_norm3(self.fc3(x))
        x = self.relu(x)
        x = self.fc4(x)
        x = self.sigmoid(x)
        return x


def train_model_regression(model, train_loader, val_loader, optimizer, criterion, batch_size, patience, epochs=10):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    train_losses = []
    val_losses = []
    base_path = '/content/drive/MyDrive/Colab Notebooks/Decomp/Models/regression'
    best_train_loss = float('inf')
    consecutive_no_improvement = 0
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        start = 0
        train_epoch_loss = 0.0
        total_length = len(train_loader) // batch_size


        for i in range(start, len(train_loader), batch_size):
            optimizer.zero_grad()
            scores = torch.empty(0, dtype=torch.float32, requires_grad=True).to(device)
            labels = torch.empty(0, dtype=torch.float32, requires_grad=True).to(device)
            for j in range(start, start + batch_size):
                if j >= len(train_loader):
                    break
                graph = train_loader[j]
                temp = model(graph)
                scores = torch.cat([scores, temp], dim=0)
                temp = graph.y
                temp = temp.to(device)
                labels = torch.cat([labels, temp], dim=0)
            start += batch_size
            scores = scores.squeeze().to(device)
            loss = criterion(scores, labels)
            train_epoch_loss += loss.item()


            # Backward pass and optimization
            loss.backward()
            optimizer.step()

        average_train_loss = train_epoch_loss / total_length
        train_losses.append(average_train_loss)


        # Print progress
        print(f'Epoch [{epoch+1}/{epochs}], '
              f'Train Loss: {average_train_loss:.4f}')
        if train_losses:
            if min(train_losses) < best_train_loss:
                best_train_loss = min(train_losses)
                consecutive_no_improvement = 0
            else:
                consecutive_no_improvement += 1
            if consecutive_no_improvement >= patience:
                print(f'Early stopping at epoch {epoch+1} due to no improvement in training loss.')
                break
    # Plot the training and validation loss curves
    plt.plot(range(1, epoch+2), train_losses, label='Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

# IMITATION EVALUATION

In [None]:
# function to evaluate imitation algo GCN VERSION
def model_imitation_evaluation(model, pygraph):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval()
    nxgraph = to_networkx(pygraph)
    best_node = None
    best_value = -9999
    normalize_function = max_normalize # [0,1] continuous
    normalize_param = 0.2
    dataset_normalize([pygraph], normalize_function=normalize_function, normalize_param=normalize_param)
    labels = pygraph.y
    outputs = model(pygraph)
    outputs = F.softmax(outputs, dim=1)
    temp = outputs.detach().cpu()
    temp = temp.numpy()
    outputs = temp[:, 1]
    best_value = -9999
    best_node = None
    for node, output_value in enumerate(outputs):
        #print(node)
        if output_value > best_value:
            best_value = output_value
            best_node = node
    output_label = labels[best_node]
    return output_label

In [None]:
# function to evaluate imitation algo GCN VERSION
def model_imitation_evaluation_pairwise(model, pygraph):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    pygraph.to(device)
    # save labels for eval
    normalize_function = max_normalize # [0,1] continuous
    normalize_param = 0.2
    dataset_normalize([pygraph], normalize_function=normalize_function, normalize_param=normalize_param)
    labels = pygraph.y
    # completely normalize now
    normalize_function = max_normalize_binary # [0,1] continuous
    normalize_param = 0.5
    dataset_normalize([pygraph], normalize_function=normalize_function, normalize_param=normalize_param)
    batch_size = 1
    epochs = 15
    input_size = 9
    gcn_hidden_size = 256
    criterion = nn.MarginRankingLoss(reduction='sum')
    optimizer = optim.Adam(model.parameters(), lr=0.005)
    # Train the model with validation
    train_model_pairwise(model, [pygraph], val_graphs, optimizer, criterion, batch_size=batch_size, epochs=epochs)
    model.eval()
    nxgraph = to_networkx(pygraph)
    best_node = None
    best_value = -9999
    outputs = model(pygraph)
    outputs = F.softmax(outputs, dim=1)
    temp = outputs.detach().cpu()
    temp = temp.numpy()
    outputs = temp[:, 1]
    best_value = -9999
    best_node = None
    for node, output_value in enumerate(outputs):
        #print(node)
        if output_value > best_value:
            best_value = output_value
            best_node = node
    output_label = labels[best_node]
    return output_label

In [None]:
# function to evaluate imitation algo GCN VERSION
def model_imitation_evaluation_regression(model, pygraph):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    pygraph.to(device)
    # save labels for eval
    normalize_function = max_normalize # [0,1] continuous
    normalize_param = 0.2
    dataset_normalize([pygraph], normalize_function=normalize_function, normalize_param=normalize_param)
    labels = pygraph.y
    batch_size = 1
    epochs = 2000
    input_size = 9
    gcn_hidden_size = 256
    patience = 100
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001 )
    # Train the model with validation
    train_model_regression(model, [pygraph], val_graphs, optimizer, criterion, batch_size=batch_size, patience=patience, epochs=epochs)
    model.eval()
    nxgraph = to_networkx(pygraph)
    best_node = None
    best_value = -9999
    outputs = model(pygraph)
    outputs = outputs.detach().cpu()
    outputs = outputs.numpy()
    best_value = -9999
    best_node = None
    for node, output_value in enumerate(outputs):
        #print(node)
        if output_value > best_value:
            best_value = output_value
            best_node = node
    output_label = labels[best_node]
    return output_label

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from torch.utils.data import Subset, random_split
import numpy as np

# Load data and normalize the labels
loaded_dataset = torch.load('/content/drive/MyDrive/Colab Notebooks/Decomp/LabeledData/NetworkxGraphs/test_200nodes_100graphs_features.pt')

In [None]:
test_size = len(loaded_dataset)
bc_outputs = []
simple_rank_outputs = []
simple_rank_instance_outputs = []
pairwise_rank_instance_outputs = []
regression_instance_outputs = []
for i in tqdm(range(test_size)):
    print("GRAPH NUMBER:", i)
    graph = loaded_dataset[i]
    #bc_model = BinaryClassificationModel(9, 256)
    #bc_model.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/Decomp/Models/Accepted/BC_16.pth', map_location=torch.device('cpu')))
    simple_rank_model = SimpleRankModel(9, 256)
    simple_rank_model.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/Decomp/Models/Accepted/pairwiserank_14.pth', map_location=torch.device('cpu')))
    #pairwise_rank_model_instance = PairwiseRankModel(9, 256)
    #regression_model_instance = RegressionModel(9, 256)
    #bc_output = model_imitation_evaluation(bc_model, graph)
    simple_rank_output = model_imitation_evaluation(simple_rank_model, graph)
    #pairwise_rank_instance_output = model_imitation_evaluation_pairwise(pairwise_rank_model_instance, graph)
    #regression_output = model_imitation_evaluation_regression(regression_model_instance, graph)
    # save
    #bc_outputs.append(bc_output.item())
    simple_rank_outputs.append(simple_rank_output.item())
    #pairwise_rank_instance_outputs.append(pairwise_rank_instance_output.item())
    #regression_instance_outputs.append(regression_output.item())

print('\n')
print(bc_outputs)
print(simple_rank_outputs)
print(pairwise_rank_instance_outputs)
print(regression_instance_outputs)

print(np.mean(bc_outputs))
print(np.mean(simple_rank_outputs))
print(np.mean(pairwise_rank_instance_outputs))
print(np.mean(regression_instance_outputs))

In [1]:
bc_outputs = [0.7795275449752808, 0.8662420511245728, 0.8314606547355652, 1.0, 0.9417475461959839, 0.8320610523223877, 1.0, 0.013333333656191826, 0.019999999552965164, 0.8529411554336548, 0.7394366264343262, 0.7822580933570862, 1.0, 1.0, 0.7142857313156128, 0.8819444179534912, 0.7808219194412231, 0.0139860138297081, 0.7241379022598267, 0.013793103396892548, 0.8306451439857483, 1.0, 0.8644067645072937, 0.8880000114440918, 0.8164557218551636, 0.7405063509941101, 0.774193525314331, 0.8518518805503845, 0.7956204414367676, 0.012658228166401386, 0.8598726391792297, 0.014492753893136978, 0.904347836971283, 0.23333333432674408, 1.0, 0.5714285969734192, 1.0, 0.7553191781044006, 0.8300653696060181, 0.8203125, 0.7594936490058899, 1.0, 0.7640449404716492, 1.0, 0.01652892492711544, 0.7300000190734863, 1.0, 0.834782600402832, 0.8376068472862244, 1.0, 0.8113207817077637, 0.8229166865348816, 0.7816091775894165, 0.012820512987673283, 0.9541984796524048, 0.7654321193695068, 0.7888888716697693, 1.0, 0.841269850730896, 0.7472527623176575, 0.7572815418243408, 0.5833333134651184, 0.01315789483487606, 0.8217821717262268, 0.7586206793785095, 0.8558558821678162, 0.7945205569267273, 0.4000000059604645, 0.5, 0.012500000186264515, 0.8695651888847351, 0.8928571343421936, 0.013071895577013493, 1.0, 0.7254902124404907, 0.6893203854560852, 1.0, 0.8041236996650696, 1.0, 0.8661417365074158, 1.0, 0.8243243098258972, 0.8099173307418823, 0.9767441749572754, 1.0, 0.7868852615356445, 0.8272727131843567, 0.8692810535430908, 1.0, 0.013333333656191826, 1.0, 0.014492753893136978, 0.6516854166984558, 0.747474730014801, 0.834645688533783, 0.7964601516723633, 0.9509803652763367, 0.8214285969734192, 0.8559321761131287, 1.0]
simple_rank_outputs = [0.7244094610214233, 0.7324841022491455, 0.7415730357170105, 1.0, 0.7572815418243408, 0.8396946787834167, 1.0, 0.8600000143051147, 0.02500000037252903, 0.8235294222831726, 0.8380281925201416, 0.8064516186714172, 0.14499999582767487, 1.0, 0.8067227005958557, 0.8680555820465088, 0.8630136847496033, 0.8741258978843689, 0.7931034564971924, 0.013793103396892548, 0.7903226017951965, 1.0, 0.7627118825912476, 0.8960000276565552, 0.8481012582778931, 0.7594936490058899, 0.8387096524238586, 0.7222222089767456, 0.7299270033836365, 0.7848101258277893, 0.7898089289665222, 1.0, 0.8086956739425659, 0.03333333507180214, 1.0, 0.0714285746216774, 1.0, 0.7340425252914429, 0.8235294222831726, 0.875, 0.7721518874168396, 0.009999999776482582, 0.6853932738304138, 0.004999999888241291, 1.0, 0.8500000238418579, 1.0, 0.8782608509063721, 0.7606837749481201, 0.014999999664723873, 0.7735849022865295, 0.7916666865348816, 0.6551724076271057, 0.012820512987673283, 0.7633587718009949, 0.7777777910232544, 0.855555534362793, 1.0, 0.7777777910232544, 0.8241758346557617, 0.7961165308952332, 0.0833333358168602, 0.01315789483487606, 0.8712871074676514, 0.8390804529190063, 0.8198198080062866, 0.8630136847496033, 0.4000000059604645, 0.1666666716337204, 0.012500000186264515, 0.8695651888847351, 0.8095238208770752, 0.7843137383460999, 1.0, 0.7941176295280457, 0.708737850189209, 1.0, 0.6907216310501099, 1.0, 0.8897637724876404, 1.0, 0.837837815284729, 0.8181818127632141, 0.9224806427955627, 1.0, 0.8032786846160889, 0.800000011920929, 0.8039215803146362, 1.0, 0.8866666555404663, 1.0, 0.9637681245803833, 0.8202247023582458, 0.808080792427063, 0.9212598204612732, 0.8761062026023865, 0.7450980544090271, 0.7976190447807312, 0.8644067645072937, 1.0]
regression_outputs = [0.7952755689620972, 0.8152866363525391, 0.6966292262077332, 1.0, 0.7766990065574646, 0.847328245639801, 1.0, 0.9133333563804626, 0.02500000037252903, 0.656862735748291, 0.8239436745643616, 0.9112903475761414, 0.14499999582767487, 1.0, 0.9159663915634155, 0.013888888992369175, 0.7739726305007935, 0.0139860138297081, 0.9425287246704102, 0.013793103396892548, 0.9435483813285828, 1.0, 0.7627118825912476, 0.8799999952316284, 0.012658228166401386, 0.7784810066223145, 0.7311828136444092, 0.7129629850387573, 0.8321167826652527, 0.8860759735107422, 0.012738853693008423, 0.014492753893136978, 0.678260862827301, 0.03333333507180214, 1.0, 0.0714285746216774, 1.0, 0.7127659320831299, 0.843137264251709, 0.7890625, 0.8101266026496887, 0.009999999776482582, 0.7977527976036072, 0.004999999888241291, 0.01652892492711544, 0.7699999809265137, 1.0, 0.6695652008056641, 0.7606837749481201, 0.014999999664723873, 0.805031418800354, 0.84375, 0.931034505367279, 0.012820512987673283, 0.8549618124961853, 0.8765432238578796, 0.7222222089767456, 1.0, 0.8730158805847168, 0.6813187003135681, 1.0, 0.0833333358168602, 0.01315789483487606, 0.7623762488365173, 0.7701149582862854, 0.684684693813324, 0.8287671208381653, 0.20000000298023224, 0.1666666716337204, 0.012500000186264515, 0.739130437374115, 0.7857142686843872, 0.013071895577013493, 1.0, 0.7352941036224365, 0.6699029207229614, 1.0, 0.6391752362251282, 1.0, 0.8267716765403748, 1.0, 0.8716216087341309, 0.702479362487793, 0.8682170510292053, 1.0, 0.7950819730758667, 0.7454545497894287, 0.7516340017318726, 1.0, 0.800000011920929, 1.0, 0.014492753893136978, 0.7865168452262878, 0.6868686676025391, 0.7165354490280151, 0.6637167930603027, 0.8235294222831726, 0.8690476417541504, 0.6779661178588867, 1.0]
pairwise_rank_outputs = [0.7637795209884644, 0.7643312215805054, 0.7528089880943298, 1.0, 0.8834951519966125, 0.7557252049446106, 1.0, 0.013333333656191826, 1.0, 0.8235294222831726, 0.6971830725669861, 0.7983871102333069, 1.0, 1.0, 0.7815126180648804, 0.013888888992369175, 0.7397260069847107, 0.0139860138297081, 0.7701149582862854, 0.013793103396892548, 0.6693548560142517, 1.0, 0.8644067645072937, 0.7919999957084656, 0.012658228166401386, 0.6645569801330566, 0.774193525314331, 0.8333333134651184, 0.8029196858406067, 0.012658228166401386, 0.7579618096351624, 0.014492753893136978, 0.782608687877655, 0.2666666805744171, 1.0, 0.6428571343421936, 1.0, 0.7978723645210266, 0.7450980544090271, 0.78125, 0.753164529800415, 1.0, 0.7191011309623718, 1.0, 0.01652892492711544, 0.8500000238418579, 1.0, 0.7652173638343811, 0.7435897588729858, 1.0, 0.805031418800354, 0.84375, 0.7356321811676025, 0.012820512987673283, 0.7099236845970154, 0.8518518805503845, 0.7333333492279053, 1.0, 0.7698412537574768, 0.8461538553237915, 0.7766990065574646, 0.3333333432674408, 0.01315789483487606, 0.8613861203193665, 0.7931034564971924, 0.7207207083702087, 0.7602739930152893, 0.4000000059604645, 0.5, 0.012500000186264515, 0.7739130258560181, 0.8928571343421936, 0.758169949054718, 1.0, 0.8627451062202454, 0.8834951519966125, 1.0, 0.8556700944900513, 1.0, 0.6929134130477905, 1.0, 0.8040540814399719, 0.7933884263038635, 0.7054263353347778, 1.0, 0.7131147384643555, 0.7909091114997864, 0.013071895577013493, 1.0, 0.8199999928474426, 1.0, 0.014492753893136978, 0.6741573214530945, 0.868686854839325, 0.9055117964744568, 0.7876105904579163, 0.8921568393707275, 0.8095238208770752, 0.8389830589294434, 1.0]
pairwise_rank_instance_outputs = [0.7716535329818726, 0.7643312215805054, 0.8426966071128845, 1.0, 0.7766990065574646, 0.7557252049446106, 1.0, 0.9599999785423279, 1.0, 0.656862735748291, 0.8521126508712769, 0.725806474685669, 1.0, 1.0, 0.7815126180648804, 0.9166666865348816, 0.8082191944122314, 1.0, 0.7586206793785095, 0.013793103396892548, 0.6693548560142517, 1.0, 0.805084764957428, 0.8159999847412109, 0.012658228166401386, 0.8417721390724182, 0.8817204236984253, 0.8888888955116272, 0.8029196858406067, 0.012658228166401386, 0.8025477528572083, 0.014492753893136978, 0.7739130258560181, 0.2666666805744171, 1.0, 0.5714285969734192, 1.0, 0.7553191781044006, 0.7450980544090271, 0.890625, 0.8481012582778931, 1.0, 0.6966292262077332, 1.0, 0.01652892492711544, 0.8700000047683716, 1.0, 0.8782608509063721, 0.7606837749481201, 1.0, 0.805031418800354, 0.8020833134651184, 0.8390804529190063, 0.012820512987673283, 0.7404580116271973, 0.8518518805503845, 0.9555555582046509, 1.0, 0.738095223903656, 0.7692307829856873, 0.7572815418243408, 0.3333333432674408, 0.01315789483487606, 0.7029703259468079, 0.7701149582862854, 0.8558558821678162, 1.0, 0.6000000238418579, 0.5, 0.012500000186264515, 0.739130437374115, 0.988095223903656, 0.758169949054718, 1.0, 0.7843137383460999, 0.6699029207229614, 1.0, 0.8865979313850403, 1.0, 0.6929134130477905, 1.0, 0.7432432174682617, 0.702479362487793, 0.7054263353347778, 1.0, 0.7950819730758667, 0.7727272510528564, 0.8888888955116272, 1.0, 0.8199999928474426, 1.0, 0.9637681245803833, 0.6292135119438171, 0.6868686676025391, 0.834645688533783, 0.6637167930603027, 0.7450980544090271, 0.9642857313156128, 0.8305084705352783, 1.0]
regression_instance_outputs = [0.7795275449752808, 0.012738853693008423, 0.7865168452262878, 1.0, 0.8834951519966125, 0.8320610523223877, 1.0, 0.013333333656191826, 1.0, 0.8921568393707275, 1.0, 0.8145161271095276, 1.0, 1.0, 0.9495798349380493, 0.9166666865348816, 0.7534246444702148, 0.9510489702224731, 0.7471264600753784, 0.9379310607910156, 0.8387096524238586, 1.0, 0.7966101765632629, 0.8880000114440918, 0.9367088675498962, 0.7911392450332642, 1.0, 0.8148148059844971, 0.9416058659553528, 0.905063271522522, 0.012738853693008423, 1.0, 0.843478262424469, 0.3333333432674408, 1.0, 0.6428571343421936, 1.0, 0.8829787373542786, 0.7450980544090271, 0.9609375, 0.753164529800415, 1.0, 0.7528089880943298, 1.0, 1.0, 0.8999999761581421, 1.0, 0.895652174949646, 0.8461538553237915, 1.0, 0.805031418800354, 1.0, 0.6781609058380127, 1.0, 0.9160305261611938, 0.7407407164573669, 0.9444444179534912, 1.0, 0.9047619104385376, 0.9340659379959106, 0.7864077687263489, 0.3333333432674408, 0.01315789483487606, 0.7623762488365173, 0.8735632300376892, 0.7387387156486511, 0.8287671208381653, 0.4000000059604645, 0.5, 0.012500000186264515, 0.8695651888847351, 0.7976190447807312, 0.9411764740943909, 1.0, 0.8529411554336548, 0.708737850189209, 1.0, 0.7835051417350769, 1.0, 0.8503937125205994, 1.0, 0.8243243098258972, 0.93388432264328, 0.8914728760719299, 1.0, 0.9180327653884888, 0.8272727131843567, 0.8888888955116272, 1.0, 0.8866666555404663, 1.0, 0.9637681245803833, 0.8426966071128845, 0.7979797720909119, 0.9055117964744568, 0.9469026327133179, 0.8039215803146362, 0.7976190447807312, 0.7711864113807678, 1.0]



#0.72958134002983577 bc
#0.7406943323463202 simple rank
#0.6593492193240672 regression
#0.7127844956889748 pairwise
#0.7633054879307747 pairwise instance
#0.8272812394890934 regression instance

In [5]:
import matplotlib.pyplot as plt
import numpy as np

bc_outputs = [0.7795275449752808, 0.8662420511245728, 0.8314606547355652, 1.0, 0.9417475461959839, 0.8320610523223877, 1.0, 0.013333333656191826, 0.019999999552965164, 0.8529411554336548, 0.7394366264343262, 0.7822580933570862, 1.0, 1.0, 0.7142857313156128, 0.8819444179534912, 0.7808219194412231, 0.0139860138297081, 0.7241379022598267, 0.013793103396892548, 0.8306451439857483, 1.0, 0.8644067645072937, 0.8880000114440918, 0.8164557218551636, 0.7405063509941101, 0.774193525314331, 0.8518518805503845, 0.7956204414367676, 0.012658228166401386, 0.8598726391792297, 0.014492753893136978, 0.904347836971283, 0.23333333432674408, 1.0, 0.5714285969734192, 1.0, 0.7553191781044006, 0.8300653696060181, 0.8203125, 0.7594936490058899, 1.0, 0.7640449404716492, 1.0, 0.01652892492711544, 0.7300000190734863, 1.0, 0.834782600402832, 0.8376068472862244, 1.0, 0.8113207817077637, 0.8229166865348816, 0.7816091775894165, 0.012820512987673283, 0.9541984796524048, 0.7654321193695068, 0.7888888716697693, 1.0, 0.841269850730896, 0.7472527623176575, 0.7572815418243408, 0.5833333134651184, 0.01315789483487606, 0.8217821717262268, 0.7586206793785095, 0.8558558821678162, 0.7945205569267273, 0.4000000059604645, 0.5, 0.012500000186264515, 0.8695651888847351, 0.8928571343421936, 0.013071895577013493, 1.0, 0.7254902124404907, 0.6893203854560852, 1.0, 0.8041236996650696, 1.0, 0.8661417365074158, 1.0, 0.8243243098258972, 0.8099173307418823, 0.9767441749572754, 1.0, 0.7868852615356445, 0.8272727131843567, 0.8692810535430908, 1.0, 0.013333333656191826, 1.0, 0.014492753893136978, 0.6516854166984558, 0.747474730014801, 0.834645688533783, 0.7964601516723633, 0.9509803652763367, 0.8214285969734192, 0.8559321761131287, 1.0]
simple_rank_outputs = [0.7244094610214233, 0.7324841022491455, 0.7415730357170105, 1.0, 0.7572815418243408, 0.8396946787834167, 1.0, 0.8600000143051147, 0.02500000037252903, 0.8235294222831726, 0.8380281925201416, 0.8064516186714172, 0.14499999582767487, 1.0, 0.8067227005958557, 0.8680555820465088, 0.8630136847496033, 0.8741258978843689, 0.7931034564971924, 0.013793103396892548, 0.7903226017951965, 1.0, 0.7627118825912476, 0.8960000276565552, 0.8481012582778931, 0.7594936490058899, 0.8387096524238586, 0.7222222089767456, 0.7299270033836365, 0.7848101258277893, 0.7898089289665222, 1.0, 0.8086956739425659, 0.03333333507180214, 1.0, 0.0714285746216774, 1.0, 0.7340425252914429, 0.8235294222831726, 0.875, 0.7721518874168396, 0.009999999776482582, 0.6853932738304138, 0.004999999888241291, 1.0, 0.8500000238418579, 1.0, 0.8782608509063721, 0.7606837749481201, 0.014999999664723873, 0.7735849022865295, 0.7916666865348816, 0.6551724076271057, 0.012820512987673283, 0.7633587718009949, 0.7777777910232544, 0.855555534362793, 1.0, 0.7777777910232544, 0.8241758346557617, 0.7961165308952332, 0.0833333358168602, 0.01315789483487606, 0.8712871074676514, 0.8390804529190063, 0.8198198080062866, 0.8630136847496033, 0.4000000059604645, 0.1666666716337204, 0.012500000186264515, 0.8695651888847351, 0.8095238208770752, 0.7843137383460999, 1.0, 0.7941176295280457, 0.708737850189209, 1.0, 0.6907216310501099, 1.0, 0.8897637724876404, 1.0, 0.837837815284729, 0.8181818127632141, 0.9224806427955627, 1.0, 0.8032786846160889, 0.800000011920929, 0.8039215803146362, 1.0, 0.8866666555404663, 1.0, 0.9637681245803833, 0.8202247023582458, 0.808080792427063, 0.9212598204612732, 0.8761062026023865, 0.7450980544090271, 0.7976190447807312, 0.8644067645072937, 1.0]
regression_outputs = [0.7952755689620972, 0.8152866363525391, 0.6966292262077332, 1.0, 0.7766990065574646, 0.847328245639801, 1.0, 0.9133333563804626, 0.02500000037252903, 0.656862735748291, 0.8239436745643616, 0.9112903475761414, 0.14499999582767487, 1.0, 0.9159663915634155, 0.013888888992369175, 0.7739726305007935, 0.0139860138297081, 0.9425287246704102, 0.013793103396892548, 0.9435483813285828, 1.0, 0.7627118825912476, 0.8799999952316284, 0.012658228166401386, 0.7784810066223145, 0.7311828136444092, 0.7129629850387573, 0.8321167826652527, 0.8860759735107422, 0.012738853693008423, 0.014492753893136978, 0.678260862827301, 0.03333333507180214, 1.0, 0.0714285746216774, 1.0, 0.7127659320831299, 0.843137264251709, 0.7890625, 0.8101266026496887, 0.009999999776482582, 0.7977527976036072, 0.004999999888241291, 0.01652892492711544, 0.7699999809265137, 1.0, 0.6695652008056641, 0.7606837749481201, 0.014999999664723873, 0.805031418800354, 0.84375, 0.931034505367279, 0.012820512987673283, 0.8549618124961853, 0.8765432238578796, 0.7222222089767456, 1.0, 0.8730158805847168, 0.6813187003135681, 1.0, 0.0833333358168602, 0.01315789483487606, 0.7623762488365173, 0.7701149582862854, 0.684684693813324, 0.8287671208381653, 0.20000000298023224, 0.1666666716337204, 0.012500000186264515, 0.739130437374115, 0.7857142686843872, 0.013071895577013493, 1.0, 0.7352941036224365, 0.6699029207229614, 1.0, 0.6391752362251282, 1.0, 0.8267716765403748, 1.0, 0.8716216087341309, 0.702479362487793, 0.8682170510292053, 1.0, 0.7950819730758667, 0.7454545497894287, 0.7516340017318726, 1.0, 0.800000011920929, 1.0, 0.014492753893136978, 0.7865168452262878, 0.6868686676025391, 0.7165354490280151, 0.6637167930603027, 0.8235294222831726, 0.8690476417541504, 0.6779661178588867, 1.0]
pairwise_rank_outputs = [0.7637795209884644, 0.7643312215805054, 0.7528089880943298, 1.0, 0.8834951519966125, 0.7557252049446106, 1.0, 0.013333333656191826, 1.0, 0.8235294222831726, 0.6971830725669861, 0.7983871102333069, 1.0, 1.0, 0.7815126180648804, 0.013888888992369175, 0.7397260069847107, 0.0139860138297081, 0.7701149582862854, 0.013793103396892548, 0.6693548560142517, 1.0, 0.8644067645072937, 0.7919999957084656, 0.012658228166401386, 0.6645569801330566, 0.774193525314331, 0.8333333134651184, 0.8029196858406067, 0.012658228166401386, 0.7579618096351624, 0.014492753893136978, 0.782608687877655, 0.2666666805744171, 1.0, 0.6428571343421936, 1.0, 0.7978723645210266, 0.7450980544090271, 0.78125, 0.753164529800415, 1.0, 0.7191011309623718, 1.0, 0.01652892492711544, 0.8500000238418579, 1.0, 0.7652173638343811, 0.7435897588729858, 1.0, 0.805031418800354, 0.84375, 0.7356321811676025, 0.012820512987673283, 0.7099236845970154, 0.8518518805503845, 0.7333333492279053, 1.0, 0.7698412537574768, 0.8461538553237915, 0.7766990065574646, 0.3333333432674408, 0.01315789483487606, 0.8613861203193665, 0.7931034564971924, 0.7207207083702087, 0.7602739930152893, 0.4000000059604645, 0.5, 0.012500000186264515, 0.7739130258560181, 0.8928571343421936, 0.758169949054718, 1.0, 0.8627451062202454, 0.8834951519966125, 1.0, 0.8556700944900513, 1.0, 0.6929134130477905, 1.0, 0.8040540814399719, 0.7933884263038635, 0.7054263353347778, 1.0, 0.7131147384643555, 0.7909091114997864, 0.013071895577013493, 1.0, 0.8199999928474426, 1.0, 0.014492753893136978, 0.6741573214530945, 0.868686854839325, 0.9055117964744568, 0.7876105904579163, 0.8921568393707275, 0.8095238208770752, 0.8389830589294434, 1.0]
pairwise_rank_instance_outputs = [0.7716535329818726, 0.7643312215805054, 0.8426966071128845, 1.0, 0.7766990065574646, 0.7557252049446106, 1.0, 0.9599999785423279, 1.0, 0.656862735748291, 0.8521126508712769, 0.725806474685669, 1.0, 1.0, 0.7815126180648804, 0.9166666865348816, 0.8082191944122314, 1.0, 0.7586206793785095, 0.013793103396892548, 0.6693548560142517, 1.0, 0.805084764957428, 0.8159999847412109, 0.012658228166401386, 0.8417721390724182, 0.8817204236984253, 0.8888888955116272, 0.8029196858406067, 0.012658228166401386, 0.8025477528572083, 0.014492753893136978, 0.7739130258560181, 0.2666666805744171, 1.0, 0.5714285969734192, 1.0, 0.7553191781044006, 0.7450980544090271, 0.890625, 0.8481012582778931, 1.0, 0.6966292262077332, 1.0, 0.01652892492711544, 0.8700000047683716, 1.0, 0.8782608509063721, 0.7606837749481201, 1.0, 0.805031418800354, 0.8020833134651184, 0.8390804529190063, 0.012820512987673283, 0.7404580116271973, 0.8518518805503845, 0.9555555582046509, 1.0, 0.738095223903656, 0.7692307829856873, 0.7572815418243408, 0.3333333432674408, 0.01315789483487606, 0.7029703259468079, 0.7701149582862854, 0.8558558821678162, 1.0, 0.6000000238418579, 0.5, 0.012500000186264515, 0.739130437374115, 0.988095223903656, 0.758169949054718, 1.0, 0.7843137383460999, 0.6699029207229614, 1.0, 0.8865979313850403, 1.0, 0.6929134130477905, 1.0, 0.7432432174682617, 0.702479362487793, 0.7054263353347778, 1.0, 0.7950819730758667, 0.7727272510528564, 0.8888888955116272, 1.0, 0.8199999928474426, 1.0, 0.9637681245803833, 0.6292135119438171, 0.6868686676025391, 0.834645688533783, 0.6637167930603027, 0.7450980544090271, 0.9642857313156128, 0.8305084705352783, 1.0]
regression_instance_outputs = [0.7795275449752808, 0.012738853693008423, 0.7865168452262878, 1.0, 0.8834951519966125, 0.8320610523223877, 1.0, 0.013333333656191826, 1.0, 0.8921568393707275, 1.0, 0.8145161271095276, 1.0, 1.0, 0.9495798349380493, 0.9166666865348816, 0.7534246444702148, 0.9510489702224731, 0.7471264600753784, 0.9379310607910156, 0.8387096524238586, 1.0, 0.7966101765632629, 0.8880000114440918, 0.9367088675498962, 0.7911392450332642, 1.0, 0.8148148059844971, 0.9416058659553528, 0.905063271522522, 0.012738853693008423, 1.0, 0.843478262424469, 0.3333333432674408, 1.0, 0.6428571343421936, 1.0, 0.8829787373542786, 0.7450980544090271, 0.9609375, 0.753164529800415, 1.0, 0.7528089880943298, 1.0, 1.0, 0.8999999761581421, 1.0, 0.895652174949646, 0.8461538553237915, 1.0, 0.805031418800354, 1.0, 0.6781609058380127, 1.0, 0.9160305261611938, 0.7407407164573669, 0.9444444179534912, 1.0, 0.9047619104385376, 0.9340659379959106, 0.7864077687263489, 0.3333333432674408, 0.01315789483487606, 0.7623762488365173, 0.8735632300376892, 0.7387387156486511, 0.8287671208381653, 0.4000000059604645, 0.5, 0.012500000186264515, 0.8695651888847351, 0.7976190447807312, 0.9411764740943909, 1.0, 0.8529411554336548, 0.708737850189209, 1.0, 0.7835051417350769, 1.0, 0.8503937125205994, 1.0, 0.8243243098258972, 0.93388432264328, 0.8914728760719299, 1.0, 0.9180327653884888, 0.8272727131843567, 0.8888888955116272, 1.0, 0.8866666555404663, 1.0, 0.9637681245803833, 0.8426966071128845, 0.7979797720909119, 0.9055117964744568, 0.9469026327133179, 0.8039215803146362, 0.7976190447807312, 0.7711864113807678, 1.0]

In [None]:
import matplotlib.pyplot as plt
import numpy as np

bins = np.linspace(0, 1, 11)

# Calculate histograms
hist_bc, _ = np.histogram(bc_outputs, bins=bins)
hist_simple_rank, _ = np.histogram(simple_rank_outputs, bins=bins)
hist_pairwise_rank, _ = np.histogram(pairwise_rank_outputs, bins=bins)
hist_regression, _ = np.histogram(regression_outputs, bins=bins)
hist_regression_instance, _ = np.histogram(regression_instance_outputs, bins=bins)

# Calculate bin centers
bin_centers = (bins[:-1] + bins[1:]) / 2

# Plot line graphs
plt.plot(bin_centers, hist_bc, label='Binary Classification')
plt.plot(bin_centers, hist_simple_rank, label='SimpleRank')
plt.plot(bin_centers, hist_pairwise_rank, label='PairwiseRank')
plt.plot(bin_centers, hist_regression, label='Regression')
plt.plot(bin_centers, hist_regression_instance, label='Regression Instance')

# Extend x-axis to include 1
plt.xlim(0, 1)

# Add labels and title
plt.xlabel('Actual Expert Policy Labels of Selected Nodes')
plt.ylabel('Frequency (Percentage)')
plt.title('Performance Distribution of Imitation Learning Models')

# Add legend
plt.legend()

# Show the plot
plt.show()
