# **Problem 1.**
## **1. Erdos-Renyi graph**


In [6]:
import networkx as nx
import matplotlib.pyplot as plt
import random as rand
import multiprocessing
from tqdm import tqdm

In [13]:
class Graph:
    def __init__(self):
        self.G = nx.Graph()
        self.nodes = {}
        self.edges = {}

    def compute_diameter(self):
        return nx.diameter(self.G)

    def add_node(self, node):
        if node not in self.nodes:
            self.nodes[node] = set()

    def add_edge(self, node1, node2):
        if node1 in self.nodes and node2 in self.nodes:
            self.edges[(node1, node2)] = 1
            self.edges[(node2, node1)] = 1
            self.nodes[node1].add(node2)
            self.nodes[node2].add(node1)

    '''
    The easiest property to study is the degree distribution. A given node v is incident with
    n − 1 potential edges, and each of them exists with probability p independently of each other.
    '''
    '''
    in Section 3.2 we mentioned
    that in social networks we observe heavy-tailed degree distributions. This shows one of the
    main problems of the Erd˝os-R´enyi random graph model for modeling social networks. In other
    words in the Erd˝os-R´enyi model the nodes are very similar
    '''


    def compute_degree_distribution(self):
        degree_counts = {}
        for node in self.nodes:
            degree = len(self.nodes[node])
            degree_counts[degree] = degree_counts.get(degree, 0) + 1
        return degree_counts


    def compute_diameter(self):
        # Use breadth-first search to find the maximum shortest path length
        max_path_length = 0
        for node in self.nodes:
            queue = [(node, 0)]
            visited = set([node])

            while queue:
                current_node, path_length = queue.pop(0)
                max_path_length = max(max_path_length, path_length)

                for neighbor in self.nodes[current_node]:
                    if neighbor not in visited:
                        visited.add(neighbor)
                        queue.append((neighbor, path_length + 1))

        return max_path_length
    def compute_clustering_coefficient(self):
        return nx.average_clustering(self.G)

    def plot_degree_distribution(self):
        degree_dist = self.compute_degree_distribution()
        degrees = list(degree_dist.keys())
        frequencies = list(degree_dist.values())

        plt.bar(degrees, frequencies)
        plt.xlabel("Degree")
        plt.ylabel("Frequency")
        plt.title("Degree Distribution")
        plt.show()

        plt.loglog(degrees, frequencies, 'bo', markersize=5)
        plt.xlabel("Degree")
        plt.ylabel("Frequency")
        plt.title("degree Distribution (logarithmic Scale)")
        plt.show()

    def plot_graph(self):
        G = nx.Graph()
        G.add_nodes_from(self.nodes.keys())
        G.add_edges_from(self.edges.keys())

        pos = nx.spring_layout(G)
        nx.draw(G, pos, with_labels=True)
        plt.show()

    def compute_average_path_length(self):
        return nx.average_shortest_path_length(self.G)


class ErdosRenyiGraph(Graph):
    def generate_er_graph(self, n, p):
        self.nodes = {}
        self.edges = {}

        self.add_node(0)  # Add the first node

        for i in range(1, n):
            self.add_node(i)  # Add nodes from 1 to n-1

            for j in range(i):
                if rand.random() < p:  # Add an edge with probability p
                    self.add_edge(i, j)

class WattsStrogatzGraph(Graph):
    def generate_ws_graph(self, n, k, beta):
        self.nodes = {}
        self.edges = {}

        for node in range(n):
            self.add_node(node)

        # Create initial ring lattice
        for node in self.nodes:
            for i in range(1, k // 2 + 1):
                neighbor = (node + i) % n
                self.add_edge(node, neighbor)

        # Rewire edges
        for node in self.nodes:
            for i in range(1, k // 2 + 1):
                if rand.random() < beta:
                    neighbor = (node + i) % n
                    self.rewire_edge(node, neighbor)

    def rewire_edge(self, node, neighbor):
        # Remove existing edge
        self.remove_edge(node, neighbor)

        # Select a random node to connect
        new_neighbor = rand.choice(list(self.nodes.keys()))

        # Make sure the new neighbor is not the same as the original node or an existing neighbor
        while new_neighbor == node or new_neighbor == neighbor or new_neighbor in self.nodes[node]:
            new_neighbor = rand.choice(list(self.nodes.keys()))

        # Add the new edge
        self.add_edge(node, new_neighbor)

    def remove_edge(self, node1, node2):
        if (node1, node2) in self.edges:
            del self.edges[(node1, node2)]
            del self.edges[(node2, node1)]
            self.nodes[node1].remove(node2)
            self.nodes[node2].remove(node1)


class BarabasiAlbertGraph(Graph):
    def generate_ba_graph(self, n, l):
        self.nodes = {}
        self.edges = {}

        self.add_node(0)  # Add the first node

        for i in range(1, n):
            self.add_node(i)  # Add nodes from 1 to n-1

            # Connect the new node to l existing nodes
            selected_nodes = self.select_nodes_to_connect(l)
            for node in selected_nodes:
                self.add_edge(i, node)

    def select_nodes_to_connect(self, l):
        # Calculate the total degree of the graph
        total_degree = sum([len(neighbors) for neighbors in self.nodes.values()])

        if total_degree == 0:
            # If total_degree is zero, assign equal probabilities to all nodes
            probabilities = [1 / len(self.nodes)] * len(self.nodes)
        else:
            # Calculate the probability for each node to be selected
            probabilities = [len(neighbors) / total_degree for neighbors in self.nodes.values()]

        # Select l nodes based on the probabilities
        selected_nodes = rand.choices(list(self.nodes.keys()), weights=probabilities, k=l)
        return selected_nodes

    def compute_correlation_coefficient(self):
        if len(self.G) == 0 or len(self.G.edges()) == 0:
            return None  # Return None if the graph is empty

        return nx.degree_assortativity_coefficient(self.G)


In [114]:
graph = ErdosRenyiGraph()
graph.generate_er_graph(10000, 0.01)   # Generate an Erdos-Renyi graph with 1000 nodes and edge probability 0.01 degree
#graph.compute_degree_distribution()
#graph.plot_degree_distribution()

print("Correlation Coefficient:", graph.compute_correlation_coefficient())
connectedness = graph.compute_connectedness()
print("Connectedness", connectedness)# Compute and print the diameter
#diameter = graph.compute_diameter()
#print("ER Diameter:", diameter)
#
## Compute and print the clustering coefficient
#clustering_coefficient = graph.compute_clustering_coefficient()
#print("ER Clustering Coefficient:", clustering_coefficient)



Correlation Coefficient: 0.000979687525672006
Connectedness True


In [116]:
graph = ErdosRenyiGraph()
graph.generate_er_graph(10000, 0.11)   # Generate an Erdos-Renyi graph with 1000 nodes and edge probability 0.01 degree
#graph.compute_degree_distribution()
#graph.plot_degree_distribution()
print("Correlation Coefficient:", graph.compute_correlation_coefficient())
connectedness = graph.compute_connectedness()
print("Connectedness", connectedness)
# Compute and print the diameter
#diameter = graph.compute_diameter()
#print("ER Diameter:", diameter)
#
## Compute and print the clustering coefficient
#clustering_coefficient = graph.compute_clustering_coefficient()
#print("ER Clustering Coefficient:", clustering_coefficient)

Correlation Coefficient: -0.0010902031856076964
Connectedness True


In [None]:
graph = ErdosRenyiGraph()
graph.generate_er_graph(10000, 0.3)   # Generate an Erdos-Renyi graph with 1000 nodes and edge probability 0.01 degree
#graph.compute_degree_distribution()
#graph.plot_degree_distribution()
#print("Correlation Coefficient:", graph.compute_correlation_coefficient())
#connectedness = graph.compute_connectedness()
#print("Connectedness", connectedness)
## Compute and print the diameter
diameter = graph.compute_diameter()
#print("ER Diameter:", diameter)
#
## Compute and print the clustering coefficient
clustering_coefficient = graph.compute_clustering_coefficient()
#print("ER Clustering Coefficient:", clustering_coefficient)

In [118]:
graph = BarabasiAlbertGraph()
graph.generate_ba_graph(10000, 5)   # Generate an
#graph.compute_degree_distribution()
#graph.plot_degree_distribution()
#print("Correlation Coefficient:", graph.compute_correlation_coefficient())
#connectedness = graph.compute_connectedness()
#print("Connectedness", connectedness)
## Compute and print the diameter
diameter = graph.compute_diameter()
print("BA Diameter:", diameter)
#
## Compute and print the clustering coefficient
clustering_coefficient = graph.compute_clustering_coefficient()
print("BA Clustering Coefficient:", clustering_coefficient)
#print("Connectedness:", graph.compute_connectedness())
#print("Average Path Length:", graph.compute_average_path_length())
#


Correlation Coefficient: -0.025160322086971214
Connectedness True


In [None]:
graph = BarabasiAlbertGraph()
graph.generate_ba_graph(20000, 5)   # Generate an
#graph.compute_degree_distribution()
#graph.plot_degree_distribution()
#print("Correlation Coefficient:", graph.compute_correlation_coefficient())
#connectedness = graph.compute_connectedness()
#print("Connectedness", connectedness)
## Compute and print the diameter
diameter = graph.compute_diameter()
print("BA Diameter:", diameter)
#
## Compute and print the clustering coefficient
clustering_coefficient = graph.compute_clustering_coefficient()
print("BA Clustering Coefficient:", clustering_coefficient)
#print("Connectedness:", graph.compute_connectedness())
#print("Average Path Length:", graph.compute_average_path_length())
#


In [120]:
graph = BarabasiAlbertGraph()
graph.generate_ba_graph(20000, 50)   # Generate an
#graph.compute_degree_distribution()
#graph.plot_degree_distribution()
print("Correlation Coefficient:", graph.compute_correlation_coefficient())
connectedness = graph.compute_connectedness()
print("Connectedness", connectedness)
## Compute and print the diameter
#diameter = graph.compute_diameter()
#print("BA Diameter:", diameter)
#
## Compute and print the clustering coefficient
#clustering_coefficient = graph.compute_clustering_coefficient()
#print("BA Clustering Coefficient:", clustering_coefficient)
#
#

Correlation Coefficient: 0.0014969868261041667
Connectedness True


In [121]:
graph = BarabasiAlbertGraph()
graph.generate_ba_graph(20000, 100)   # Generate an
#graph.compute_degree_distribution()
#graph.plot_degree_distribution()
print("Correlation Coefficient:", graph.compute_correlation_coefficient())
connectedness = graph.compute_connectedness()
print("Connectedness", connectedness)
## Compute and print the diameter
#diameter = graph.compute_diameter()
#print("BA Diameter:", diameter)
#
## Compute and print the clustering coefficient
#clustering_coefficient = graph.compute_clustering_coefficient()
#print("BA Clustering Coefficient:", clustering_coefficient)
#
#

Correlation Coefficient: 0.005468771475428923
Connectedness True


In [122]:
graph = WattsStrogatzGraph()
graph.generate_ws_graph(10000,400, 0.01)
#graph.compute_degree_distribution()
#graph.plot_degree_distribution()
#
## Compute and print the diameter
#diameter = graph.compute_diameter()
#print("WS Diameter:", diameter)
#
## Compute and print the clustering coefficient
#clustering_coefficient = graph.compute_clustering_coefficient()
#print("WS Clustering Coefficient:", clustering_coefficient)
#
##graph.plot_graph()
print("Correlation Coefficient:", graph.compute_correlation_coefficient())
connectedness = graph.compute_connectedness()
print("Connectedness", connectedness)

Correlation Coefficient: -0.0011438354389191228
Connectedness True


In [123]:
graph = WattsStrogatzGraph()
graph.generate_ws_graph(20000,200, 0.01)
#graph.compute_degree_distribution()
#graph.plot_degree_distribution()
#
## Compute and print the diameter
#diameter = graph.compute_diameter()
#print("WS Diameter:", diameter)
#
## Compute and print the clustering coefficient
#clustering_coefficient = graph.compute_clustering_coefficient()
#print("WS Clustering Coefficient:", clustering_coefficient)
#
##graph.plot_graph()
print("Correlation Coefficient:", graph.compute_correlation_coefficient())
connectedness = graph.compute_connectedness()
print("Connectedness", connectedness)
#print("Average Path Length:", graph.compute_average_path_length())

Correlation Coefficient: -0.0015213639799704947
Connectedness True


In [124]:
graph = WattsStrogatzGraph()
graph.generate_ws_graph(20000,200, 0.11)
#graph.compute_degree_distribution()
#graph.plot_degree_distribution()
#
## Compute and print the diameter
#diameter = graph.compute_diameter()
#print("WS Diameter:", diameter)
#
## Compute and print the clustering coefficient
#clustering_coefficient = graph.compute_clustering_coefficient()
#print("WS Clustering Coefficient:", clustering_coefficient)
##graph.plot_graph()
print("Correlation Coefficient:", graph.compute_correlation_coefficient())
connectedness = graph.compute_connectedness()
print("Connectedness", connectedness)

Correlation Coefficient: 0.000443634502790464
Connectedness True


In [125]:
graph = WattsStrogatzGraph()
graph.generate_ws_graph(20000,400, 0.1)
#graph.compute_degree_distribution()
#graph.plot_degree_distribution()
#
## Compute and print the diameter
#diameter = graph.compute_diameter()
#print("WS Diameter:", diameter)
#
## Compute and print the clustering coefficient
#clustering_coefficient = graph.compute_clustering_coefficient()
#print("WS Clustering Coefficient:", clustering_coefficient)
#
##graph.plot_graph()
print("Correlation Coefficient:", graph.compute_correlation_coefficient())
connectedness = graph.compute_connectedness()
print("Connectedness", connectedness)

Correlation Coefficient: -0.0008094265308064043
Connectedness True


In [126]:
graph = WattsStrogatzGraph()
graph.generate_ws_graph(20000,400, 0.2)
#graph.compute_degree_distribution()
#graph.plot_degree_distribution()
#
## Compute and print the diameter
#diameter = graph.compute_diameter()
#print("WS Diameter:", diameter)
#
## Compute and print the clustering coefficient
#clustering_coefficient = graph.compute_clustering_coefficient()
#print("WS Clustering Coefficient:", clustering_coefficient)
#print("Connectedness:", graph.compute_connectedness())
#print("Average Path Length:", graph.compute_average_path_length())
##graph.plot_graph()
print("Correlation Coefficient:", graph.compute_correlation_coefficient())
connectedness = graph.compute_connectedness()
print("Connectedness", connectedness)

Correlation Coefficient: -0.0011990267246720309
Connectedness True


In [127]:
graph = WattsStrogatzGraph()
graph.generate_ws_graph(20000,800, 0.2)
#graph.compute_degree_distribution()
#graph.plot_degree_distribution()
#
## Compute and print the diameter
#diameter = graph.compute_diameter()
#print("WS Diameter:", diameter)
#
## Compute and print the clustering coefficient
#clustering_coefficient = graph.compute_clustering_coefficient()
#print("WS Clustering Coefficient:", clustering_coefficient)
#print("Connectedness:", graph.compute_connectedness())
#print("Average Path Length:", graph.compute_average_path_length())
##graph.plot_graph(
print("Correlation Coefficient:", graph.compute_correlation_coefficient())
connectedness = graph.compute_connectedness()
print("Connectedness", connectedness)

Correlation Coefficient: -0.00030606988216486537
Connectedness True


In [None]:
graph = WattsStrogatzGraph()
graph.generate_ws_graph(20000,800, 0.5)
#graph.compute_degree_distribution()
#graph.plot_degree_distribution()
#
## Compute and print the diameter
#diameter = graph.compute_diameter()
#print("WS Diameter:", diameter)
#
## ompute and print the clustering coefficient
#clustering_coefficient = graph.compute_clustering_coefficient()
#print("WS Clustering Coefficient:", clustering_coefficient)
#print("Connectedness:", graph.compute_connectedness())
#print("Average Path Length:", graph.compute_average_path_length())
##graph.plot_graph()
print("Correlation Coefficient:", graph.compute_correlation_coefficient())
connectedness = graph.compute_connectedness()
print("Connectedness", connectedness)

# Problem 2

In [None]:
import networkx as nx
import matplotlib.pyplot as plt


class Node:
    def __init__(self, value):
        self.value = value
        self.next = None


def construct_circle_graph(n):
    # Create n nodes
    nodes = [Node(i) for i in range(1, n+1)]

    # Connect the nodes in a circle
    for i in range(n):
        nodes[i].next = nodes[(i+1) % n]

    return nodes


# Test the function
n = int(input("Enter the number of nodes: "))
circle_graph = construct_circle_graph(n)

# Create a directed graph from the circle graph
G = nx.DiGraph()
for node in circle_graph:
    G.add_edge(node.value, node.next.value, weight=1)

# Draw the graph
pos = nx.circular_layout(G)
nx.draw(G, pos, with_labels=True, node_size=800, node_color='lightblue', edge_color='gray', arrows=True)
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
plt.title("Directed Circle Graph")
plt.axis('equal')
plt.show()


# Problem 5

Files:

nodeId.edges : The edges in the ego network for the node 'nodeId'. Edges are undirected. The 'ego' node does not appear, but it is assumed that they follow every node id that appears in this file.

nodeId.circles : The set of circles for the ego node. Each line contains one circle, consisting of a series of node ids. The first entry in each line is the name of the circle.

nodeId.feat : The features for each of the nodes that appears in the edge file.

nodeId.egofeat : The features for the ego user.

nodeId.featnames : The names of each of the feature dimensions. Features are '1' if the user has this property in their profile, and '0' otherwise. This file has been anonymized for facebook users, since the names of the features would reveal private data.

In [5]:
import pandas as pd
import numpy as np
import codecs
import csv


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Data Preprocessing

In [35]:
import os
import numpy as np

data_dir = '/content/drive/MyDrive/Social Networks Homework_Vano Mazashvili/facebook'
node_ids = ['0', '107', '348', '414', '686', '698', '1684', '1912', '3437', '3980']

def process_feat_files(data_dir, node_id):
    feat_file_path = os.path.join(data_dir, f'{node_id}.feat')
    featname_file_path = os.path.join(data_dir, f'{node_id}.featnames')

    with open(feat_file_path, 'r') as feat_file, open(featname_file_path, 'r') as featname_file:
        featnames = featname_file.read().strip().split('\n')
        arrays = []

        for line in feat_file:
            line = line.strip()
            if line:
                feat = list(map(int, line.split()[1:]))
                selected_featnames = [featnames[i-1].split()[-1] for i, val in enumerate(feat, start=1) if val == 1]

                new_array = np.zeros(1283, dtype=int)
                for featname in selected_featnames:
                    new_array[int(featname)] = 1

                arrays.append(new_array)

        arrays = np.array(arrays)
        return arrays


edge_files= [f for f in os.listdir(data_dir) if f.endswith('.edges')]

edge_file_path = os.path.join(data_dir, f'{node_ids}.edges')

edge_files = [f for f in os.listdir(data_dir) if f.endswith('.edges')]

edges = []
for edge_file in edge_files:
    with open(os.path.join(data_dir, edge_file), 'r') as file:
        for line in file:
            edge = line.strip().split()
            edge = list(map(int, edge))  # Convert edges to integers
            edges.append(edge)

unique_nodes = set()
for edge in edges:
    unique_nodes.add(edge[0])
    unique_nodes.add(edge[1])



features = None

for node_id in node_ids:
    result = process_feat_files(data_dir, node_id)
    if features is None:
        features = result
    else:
        features = np.concatenate((features, result), axis=0)

circles_files = [f for f in os.listdir(data_dir) if f.endswith('.circles')]
l = {}
for circles_file in circles_files:
    with open(os.path.join(data_dir, circles_file), 'r') as file:
        for line in file:
            circle = line.strip().split()
            circle_name = circle[0]
            circle_nodes = circle[1:]

            if circle_name in l:
                l[circle_name].extend(circle_nodes)
            else:
                l[circle_name] = circle_nodes

# Assign labels to nodes
node_ids = set()
for node_list in l.values():
    node_ids.update(node_list)

max_label_index = len(l) - 1

label_vectors = []
for node_id in range(1, 4168):  # Assuming 4167 nodes in total
    label_vector = [0] * (max_label_index + 1)
    for label_index, node_list in enumerate(l.values()):
        if str(node_id) in node_list:
            label_vector[label_index] = 1
    label_vectors.append(label_vector)

labels = []
labels = np.sum(label_vectors, axis=1)

num_nodes = len(unique_nodes)
lb = np.zeros((num_nodes, 46), dtype=int)

for node_id, node_list in enumerate(l.values()):
    for circle_name, circle_nodes in l.items():
        circle_index = int(circle_name.replace("circle", "")) - 1
        if str(node_id + 1) in circle_nodes:
            lb[node_id][circle_index] = 1


#print(len(labels))
#print(max(labels))
#print(labels)
#print(features)
#print(features[345][2])
#print(len(features))
print(len(edges))
print(len(features))
print(l)
print(labels)
print(len(labels))
num_unique_nodes = len(unique_nodes)
print(f"Number of unique nodes: {num_unique_nodes}")

170174
4167
{'circle0': ['71', '215', '54', '61', '298', '229', '81', '253', '193', '97', '264', '29', '132', '110', '163', '259', '183', '334', '245', '222', '475', '373', '461', '391', '376', '524', '348', '436', '513', '475', '388', '373', '431', '463', '461', '517', '400', '391', '450', '500', '452', '366', '414', '408', '525', '524', '387', '376', '1043', '1045', '1030', '1252', '1254', '1368', '1197', '955', '1111', '1384', '828', '830', '708', '713', '840', '803', '747', '774', '823', '745', '856', '697', '719', '783', '717', '837', '843', '797', '728', '831', '803', '823', '705', '763', '820', '748', '824', '745', '856', '697', '804', '730', '709', '849', '719', '737', '755', '817', '836', '765', '716', '769', '782', '747', '774', '770', '718', '841', '792', '720', '781', '808', '838', '764', '819', '807', '708', '840', '698', '852', '780', '814', '809', '687', '828', '830', '754', '759', '739', '827', '815', '700', '758', '787', '798', '779', '835', '703', '790', '741', '793',

In [39]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from torch.nn import Linear

# Convert the edges list to a PyTorch Geometric data object
edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()

# Convert the features and labels to PyTorch tensors
x = torch.tensor(features, dtype=torch.float)
y = torch.tensor(labels, dtype=torch.long)


# Create a PyTorch Geometric data object
data = Data(x=x, edge_index=edge_index, y=y)

# Define the GNN model
class GNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GNNModel, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)
        self.conv3 = GCNConv(hidden_dim, hidden_dim)
        self.conv4 = GCNConv(hidden_dim, hidden_dim)
        self.conv5 = GCNConv(hidden_dim, hidden_dim)
        self.out = Linear(hidden_dim, output_dim)


    def forward(self, x, edge_index):

        # First Message Passing Layer (Transformation)
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)

        # Second Message Passing Layer
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)

        x = self.conv3(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv4(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv5(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        # Output layer
        x = F.softmax(self.out(x), dim=1)
        return x

# Set the dimensions for the GNN model
input_dim = x.shape[1]
hidden_dim = 128
output_dim = len(l)

# Create an instance of the GNN model
model = GNNModel(input_dim, hidden_dim, output_dim)

# Define the loss function and optimizerfrom torch.nn import Linear
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the GNN model
def train(model, data, optimizer, criterion, num_epochs):
    model.train()

    for epoch in range(num_epochs):
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        # Perform evaluation on the validation set
        model.eval()
        with torch.no_grad():
            logits = model(data.x, data.edge_index)
            pred = logits.argmax(dim=1)
            correct = pred[data.val_mask] == data.y[data.val_mask]
            val_acc = int(correct.sum()) / int(data.val_mask.sum())

        print(f'Epoch: {epoch+1}, Loss: {loss.item()}, Val Acc: {val_acc}')

# Split the data into training, validation, and testing masks
num_nodes = len(data.y)
train_mask = torch.zeros(num_nodes, dtype=torch.bool)
val_mask = torch.zeros(num_nodes, dtype=torch.bool)
test_mask = torch.zeros(num_nodes, dtype=torch.bool)

# Assuming 70% training, 15% validation, and 15% testing split
train_mask[:int(num_nodes*0.7)] = True
val_mask[int(num_nodes*0.7):int(num_nodes*0.85)] = True
test_mask[int(num_nodes*0.85):] = True

data.train_mask = train_mask
data.val_mask = val_mask
data.test_mask = test_mask

# Train the model
num_epochs = 100
train(model, data, optimizer, criterion, num_epochs)

# Evaluate the model on the testing set
model.eval()
with torch.no_grad():
    logits = model(data.x, data.edge_index)
    pred = logits.argmax(dim=1)
    correct = pred[data.test_mask] == data.y[data.test_mask]
    test_acc = int(correct.sum()) / int(data.test_mask.sum())

print(f'Test Acc: {test_acc}')


Epoch: 1, Loss: 3.827887773513794, Val Acc: 0.8128
Epoch: 2, Loss: 3.8275399208068848, Val Acc: 0.8128
Epoch: 3, Loss: 3.827157735824585, Val Acc: 0.8128
Epoch: 4, Loss: 3.826672315597534, Val Acc: 0.8128
Epoch: 5, Loss: 3.826026201248169, Val Acc: 0.8128
Epoch: 6, Loss: 3.8251664638519287, Val Acc: 0.8128
Epoch: 7, Loss: 3.824018716812134, Val Acc: 0.8128
Epoch: 8, Loss: 3.8224806785583496, Val Acc: 0.8128
Epoch: 9, Loss: 3.820396661758423, Val Acc: 0.8128
Epoch: 10, Loss: 3.817521095275879, Val Acc: 0.8128
Epoch: 11, Loss: 3.8134765625, Val Acc: 0.8128
Epoch: 12, Loss: 3.8076674938201904, Val Acc: 0.8128
Epoch: 13, Loss: 3.7991392612457275, Val Acc: 0.8128
Epoch: 14, Loss: 3.786371946334839, Val Acc: 0.8128
Epoch: 15, Loss: 3.76709246635437, Val Acc: 0.8128
Epoch: 16, Loss: 3.7381234169006348, Val Acc: 0.8128
Epoch: 17, Loss: 3.6961469650268555, Val Acc: 0.8128
Epoch: 18, Loss: 3.6395466327667236, Val Acc: 0.8128
Epoch: 19, Loss: 3.571321964263916, Val Acc: 0.8128
Epoch: 20, Loss: 3.

In [12]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Evaluate the model on the testing set
model.eval()
with torch.no_grad():
    logits = model(data.x, data.edge_index)
    pred = logits.argmax(dim=1)
    pred_test = pred[data.test_mask].cpu().numpy()
    labels_test = data.y[data.test_mask].cpu().numpy()

accuracy = accuracy_score(labels_test, pred_test)
precision = precision_score(labels_test, pred_test, average='macro')
recall = recall_score(labels_test, pred_test, average='macro')
f1 = f1_score(labels_test, pred_test, average='macro')

print(f'Test Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-score: {f1:.4f}')


Test Accuracy: 0.1326
Precision: 0.0221
Recall: 0.1667
F1-score: 0.0390


  _warn_prf(average, modifier, msg_start, len(result))


So, our feature array will have length of 1283 and will be the same for all nodes to avoid jagged matrix

code given below creates feature vectors with the length of 1283 for each node

Obtaining Labels from the .circles files