In [9]:
import torch
import numpy as np
import networkx as nx
from sklearn.preprocessing import LabelEncoder
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from torch.optim import Adam

# Read datasets
content_path = '/Users/amylei/Desktop/CPSC583/Project/Code/dataset/citeseer-doc-classification/citeseer.content'
cites_path = '/Users/amylei/Desktop/CPSC583/Project/Code/dataset/citeseer-doc-classification/citeseer.cites'

node_features = []
node_labels = []
node_ids = []

with open(content_path, 'r') as f:
    for line in f:
        parts = line.strip().split('\t')
        node_ids.append(parts[0])
        node_features.append(list(map(float, parts[1:-1])))
        node_labels.append(parts[-1])

label_encoder = LabelEncoder()
node_labels = label_encoder.fit_transform(node_labels)

node_features = torch.tensor(node_features, dtype=torch.float)
node_labels = torch.tensor(node_labels, dtype=torch.long)

node_id_map = {node_ids[i]: i for i in range(len(node_ids))}

edges = []

with open(cites_path, 'r') as f:
    for line in f:
        try:
            parts = line.strip().split('\t')
            src_node = node_id_map[parts[0]]
            dst_node = node_id_map[parts[1]]
            edges.append([src_node, dst_node])
        except:
            continue

edges = torch.tensor(edges, dtype=torch.long).t().contiguous()

data = Data(x=node_features, edge_index=edges, y=node_labels)

# GCN model
class GCN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, 16)
        self.conv2 = GCNConv(16, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

# Model Training
model = GCN(in_channels=node_features.shape[1], out_channels=len(np.unique(node_labels)))
optimizer = Adam(model.parameters(), lr=0.01)

epochs = 200
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out, data.y)
    loss.backward()
    optimizer.step()

    if epoch % 50 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}')

# Model Testing
model.eval()
out = model(data)
preds = out.argmax(dim=1)

correct = preds == data.y
accuracy = correct.sum().item() / correct.size(0)
print(f'Accuracy: {accuracy * 100:.2f}%')


Epoch 1/200, Loss: 1.8013
Epoch 51/200, Loss: 0.1252
Epoch 101/200, Loss: 0.0526
Epoch 151/200, Loss: 0.0280
Accuracy: 99.82%


# GCN

In [35]:
import torch
import numpy as np
import random
from sklearn.preprocessing import LabelEncoder
from torch_geometric.data import Data
import torch.nn.functional as F
from torch.optim import Adam
from torch_geometric.nn import GCNConv
from sklearn.metrics import precision_score, recall_score, f1_score

# Read datasets
content_path = '/Users/amylei/Desktop/CPSC583/Project/Code/dataset/citeseer-doc-classification/citeseer.content'
cites_path = '/Users/amylei/Desktop/CPSC583/Project/Code/dataset/citeseer-doc-classification/citeseer.cites'

node_features = []
node_labels = []
node_ids = []

# Read nodes and features
with open(content_path, 'r') as f:
    for line in f:
        parts = line.strip().split('\t')
        node_ids.append(parts[0])
        node_features.append(list(map(float, parts[1:-1])))
        node_labels.append(parts[-1]) 

label_encoder = LabelEncoder()
node_labels = label_encoder.fit_transform(node_labels)

node_features = torch.tensor(node_features, dtype=torch.float)
node_labels = torch.tensor(node_labels, dtype=torch.long)

node_id_map = {node_ids[i]: i for i in range(len(node_ids))}

edges = []

with open(cites_path, 'r') as f:
    for line in f:
        try:
            parts = line.strip().split('\t')
            src_node = node_id_map[parts[0]]
            dst_node = node_id_map[parts[1]]
            edges.append([src_node, dst_node])
        except:
            continue

edges = torch.tensor(edges, dtype=torch.long).t().contiguous()

data = Data(x=node_features, edge_index=edges, y=node_labels)

num_nodes = node_features.shape[0]
train_size = int(0.8*num_nodes) 
test_size = num_nodes - train_size 

all_indices = list(range(num_nodes))
random.shuffle(all_indices)

train_indices = all_indices[:train_size]
test_indices = all_indices[train_size:]

train_mask = torch.zeros(num_nodes, dtype=torch.bool)
train_mask[train_indices] = 1

test_mask = torch.zeros(num_nodes, dtype=torch.bool)
test_mask[test_indices] = 1

data.train_mask = train_mask
data.test_mask = test_mask

class GCN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, 16)
        self.conv2 = GCNConv(16, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

n_runs = 3
accuracies = []
precisions = []
recalls = []
f1_scores = []

for run in range(n_runs):
    model = GCN(in_channels=node_features.shape[1], out_channels=len(np.unique(node_labels)))
    optimizer = Adam(model.parameters(), lr=0.01)

    epochs = 200
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        out = model(data)
        
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        if epoch % 50 == 0:
            print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}')

    model.eval()
    out = model(data)

    preds = out.argmax(dim=1)
    correct = preds[data.test_mask] == data.y[data.test_mask]
    accuracy = correct.sum().item() / correct.size(0)
    accuracies.append(accuracy)

    y_true = data.y[data.test_mask].cpu().numpy()
    y_pred = preds[data.test_mask].cpu().numpy()

    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')

    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)

print("\nEvaluation Results of GCN after {} runs:".format(n_runs))
print(f"Accuracy: {np.mean(accuracies):.4f} ± {np.std(accuracies):.4f}")
print(f"Precision: {np.mean(precisions):.4f} ± {np.std(precisions):.4f}")
print(f"Recall: {np.mean(recalls):.4f} ± {np.std(recalls):.4f}")
print(f"F1 Score: {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")


Epoch 1/200, Loss: 1.7927
Epoch 51/200, Loss: 0.0965
Epoch 101/200, Loss: 0.0351
Epoch 151/200, Loss: 0.0179
Epoch 1/200, Loss: 1.8135
Epoch 51/200, Loss: 0.0974
Epoch 101/200, Loss: 0.0365
Epoch 151/200, Loss: 0.0190
Epoch 1/200, Loss: 1.7934
Epoch 51/200, Loss: 0.0901
Epoch 101/200, Loss: 0.0323
Epoch 151/200, Loss: 0.0165

Evaluation Results of GCN after 3 runs:
Accuracy: 0.7355 ± 0.0043
Precision: 0.7346 ± 0.0044
Recall: 0.7355 ± 0.0043
F1 Score: 0.7339 ± 0.0043


# RF+GCN

In [40]:
import torch
import numpy as np
import random
from sklearn.preprocessing import LabelEncoder
from torch_geometric.data import Data
from sklearn.ensemble import RandomForestClassifier
import torch.nn.functional as F
from torch.optim import Adam
from torch_geometric.nn import GCNConv
from sklearn.metrics import precision_recall_fscore_support
import os

# Read datasets
content_path = '/Users/amylei/Desktop/CPSC583/Project/Code/dataset/citeseer-doc-classification/citeseer.content'
cites_path = '/Users/amylei/Desktop/CPSC583/Project/Code/dataset/citeseer-doc-classification/citeseer.cites'

node_features = []
node_labels = []
node_ids = []

with open(content_path, 'r') as f:
    for line in f:
        parts = line.strip().split('\t')
        node_ids.append(parts[0])
        node_features.append(list(map(float, parts[1:-1])))
        node_labels.append(parts[-1])

label_encoder = LabelEncoder()
node_labels = label_encoder.fit_transform(node_labels)

node_features = np.array(node_features, dtype=np.float32)
node_labels = np.array(node_labels, dtype=np.int64)

node_id_map = {node_ids[i]: i for i in range(len(node_ids))}

edges = []

with open(cites_path, 'r') as f:
    for line in f:
        try:
            parts = line.strip().split('\t')
            src_node = node_id_map[parts[0]]
            dst_node = node_id_map[parts[1]]
            edges.append([src_node, dst_node])
        except:
            continue

edges = torch.tensor(edges, dtype=torch.long).t().contiguous() 

data = Data(x=torch.tensor(node_features), edge_index=edges, y=torch.tensor(node_labels))

num_nodes = node_features.shape[0]
train_size = int(0.8*num_nodes) 
test_size = num_nodes - train_size 


all_indices = list(range(num_nodes))
random.shuffle(all_indices)

train_indices = all_indices[:train_size]
test_indices = all_indices[train_size:]

train_mask = torch.zeros(num_nodes, dtype=torch.bool)
train_mask[train_indices] = 1

test_mask = torch.zeros(num_nodes, dtype=torch.bool)
test_mask[test_indices] = 1

data.train_mask = train_mask
data.test_mask = test_mask

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(node_features, node_labels)

feature_importances = rf.feature_importances_
important_features_idx = np.argsort(feature_importances)[::-1][:100]

node_features = node_features[:, important_features_idx]

data.x = torch.tensor(node_features, dtype=torch.float)

processed_data_path = 'processed_data.pt'
torch.save(data, processed_data_path)
print(f"Processed data saved to {processed_data_path}")

data = torch.load(processed_data_path)

class GCN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, 32) 
        self.conv2 = GCNConv(32, out_channels) 

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return x


model = GCN(in_channels=node_features.shape[1], out_channels=len(np.unique(node_labels)))
optimizer = Adam(model.parameters(), lr=0.01)


runs = 3
accuracies = []
precisions = []
recalls = []
f1_scores = []

for run in range(runs):
    model = GCN(in_channels=node_features.shape[1], out_channels=len(np.unique(node_labels)))
    optimizer = Adam(model.parameters(), lr=0.01)

    epochs = 200
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        out = model(data)
        
        loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        if epoch % 50 == 0:
            print(f'[GCN] Run {run+1}, Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}')

    model.eval()
    out = model(data)

    preds = out.argmax(dim=1)
    correct = preds[data.test_mask] == data.y[data.test_mask] 
    accuracy = correct.sum().item() / correct.size(0)
    accuracies.append(accuracy)

    true_labels = data.y[data.test_mask].numpy()
    pred_labels = preds[data.test_mask].numpy()

    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, pred_labels, average='weighted')
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)

    print(f'[GCN] Run {run+1} - Accuracy: {accuracy * 100:.2f}%, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}')

mean_accuracy = np.mean(accuracies)
std_accuracy = np.std(accuracies)
mean_precision = np.mean(precisions)
std_precision = np.std(precisions)
mean_recall = np.mean(recalls)
std_recall = np.std(recalls)
mean_f1 = np.mean(f1_scores)
std_f1 = np.std(f1_scores)

print("\nEvaluation Results of GCN after {} runs:".format(n_runs))
print(f'Average Accuracy: {mean_accuracy :.4f} ± {std_accuracy :.4f}')
print(f'Average Precision: {mean_precision:.4f} ± {std_precision:.4f}')
print(f'Average Recall: {mean_recall:.4f} ± {std_recall:.4f}')
print(f'Average F1 Score: {mean_f1:.4f} ± {std_f1:.4f}')


Processed data saved to processed_data.pt
[GCN] Run 1, Epoch 1/200, Loss: 1.9083
[GCN] Run 1, Epoch 51/200, Loss: 0.6572
[GCN] Run 1, Epoch 101/200, Loss: 0.4794


  data = torch.load(processed_data_path)


[GCN] Run 1, Epoch 151/200, Loss: 0.3320
[GCN] Run 1 - Accuracy: 71.95%, Precision: 0.7217, Recall: 0.7195, F1: 0.7183
[GCN] Run 2, Epoch 1/200, Loss: 1.8832
[GCN] Run 2, Epoch 51/200, Loss: 0.6507
[GCN] Run 2, Epoch 101/200, Loss: 0.4862
[GCN] Run 2, Epoch 151/200, Loss: 0.3416
[GCN] Run 2 - Accuracy: 73.76%, Precision: 0.7381, Recall: 0.7376, F1: 0.7368
[GCN] Run 3, Epoch 1/200, Loss: 1.8282
[GCN] Run 3, Epoch 51/200, Loss: 0.6402
[GCN] Run 3, Epoch 101/200, Loss: 0.4648
[GCN] Run 3, Epoch 151/200, Loss: 0.3262
[GCN] Run 3 - Accuracy: 72.55%, Precision: 0.7326, Recall: 0.7255, F1: 0.7276

Evaluation Results of GCN after 3 runs:
Average Accuracy: 0.7275 ± 0.0075
Average Precision: 0.7308 ± 0.0068
Average Recall: 0.7275 ± 0.0075
Average F1 Score: 0.7276 ± 0.0075


# GraphSAGE

In [41]:
import torch
import numpy as np
import random
from sklearn.preprocessing import LabelEncoder
from torch_geometric.data import Data
import torch.nn.functional as F
from torch.optim import Adam
from torch_geometric.nn import SAGEConv 
from sklearn.metrics import precision_score, recall_score, f1_score

content_path = '/Users/amylei/Desktop/CPSC583/Project/Code/dataset/citeseer-doc-classification/citeseer.content'
cites_path = '/Users/amylei/Desktop/CPSC583/Project/Code/dataset/citeseer-doc-classification/citeseer.cites'

node_features = []
node_labels = []
node_ids = []


with open(content_path, 'r') as f:
    for line in f:
        parts = line.strip().split('\t')
        node_ids.append(parts[0])
        node_features.append(list(map(float, parts[1:-1]))) 
        node_labels.append(parts[-1])  

label_encoder = LabelEncoder()
node_labels = label_encoder.fit_transform(node_labels)

node_features = torch.tensor(node_features, dtype=torch.float)
node_labels = torch.tensor(node_labels, dtype=torch.long)

node_id_map = {node_ids[i]: i for i in range(len(node_ids))}

edges = []

with open(cites_path, 'r') as f:
    for line in f:
        try:
            parts = line.strip().split('\t')
            src_node = node_id_map[parts[0]]
            dst_node = node_id_map[parts[1]]
            edges.append([src_node, dst_node])
        except:
            continue

edges = torch.tensor(edges, dtype=torch.long).t().contiguous() 


data = Data(x=node_features, edge_index=edges, y=node_labels)


num_nodes = node_features.shape[0]
train_size = int(0.8*num_nodes) 
test_size = num_nodes - train_size 

all_indices = list(range(num_nodes))
random.shuffle(all_indices)

train_indices = all_indices[:train_size]
test_indices = all_indices[train_size:]

train_mask = torch.zeros(num_nodes, dtype=torch.bool)
train_mask[train_indices] = 1

test_mask = torch.zeros(num_nodes, dtype=torch.bool)
test_mask[test_indices] = 1

data.train_mask = train_mask
data.test_mask = test_mask

class GraphSAGE(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(in_channels, 16) 
        self.conv2 = SAGEConv(16, out_channels) 

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index)) 
        x = self.conv2(x, edge_index) 
        return F.log_softmax(x, dim=1) 

n_runs = 3
accuracies = []
precisions = []
recalls = []
f1_scores = []

for run in range(n_runs):
    model = GraphSAGE(in_channels=node_features.shape[1], out_channels=len(np.unique(node_labels)))
    optimizer = Adam(model.parameters(), lr=0.01)

    epochs = 200
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        out = model(data)
        
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        if epoch % 50 == 0:
            print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}')

    model.eval()
    out = model(data)

    preds = out.argmax(dim=1)
    correct = preds[data.test_mask] == data.y[data.test_mask]
    accuracy = correct.sum().item() / correct.size(0)
    accuracies.append(accuracy)

    y_true = data.y[data.test_mask].cpu().numpy()
    y_pred = preds[data.test_mask].cpu().numpy()

    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')

    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)

print("\nEvaluation Results for GraphSAGE after {} runs:".format(n_runs))
print(f"Accuracy: {np.mean(accuracies):.4f} ± {np.std(accuracies):.4f}")
print(f"Precision: {np.mean(precisions):.4f} ± {np.std(precisions):.4f}")
print(f"Recall: {np.mean(recalls):.4f} ± {np.std(recalls):.4f}")
print(f"F1 Score: {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")


Epoch 1/200, Loss: 1.7842
Epoch 51/200, Loss: 0.0027
Epoch 101/200, Loss: 0.0010
Epoch 151/200, Loss: 0.0006
Epoch 1/200, Loss: 1.7815
Epoch 51/200, Loss: 0.0026
Epoch 101/200, Loss: 0.0009
Epoch 151/200, Loss: 0.0006
Epoch 1/200, Loss: 1.8016
Epoch 51/200, Loss: 0.0021
Epoch 101/200, Loss: 0.0008
Epoch 151/200, Loss: 0.0005

Evaluation Results for GraphSAGE after 3 runs:
Accuracy: 0.7260 ± 0.0031
Precision: 0.7266 ± 0.0024
Recall: 0.7260 ± 0.0031
F1 Score: 0.7245 ± 0.0026


# RF+GraphSAGE

In [31]:
import torch
import numpy as np
import random
from sklearn.preprocessing import LabelEncoder
from torch_geometric.data import Data
from sklearn.ensemble import RandomForestClassifier
import torch.nn.functional as F
from torch.optim import Adam
from torch_geometric.nn import SAGEConv
from sklearn.metrics import precision_score, recall_score, f1_score
import os

# Read datasets
content_path = '/Users/amylei/Desktop/CPSC583/Project/Code/dataset/citeseer-doc-classification/citeseer.content'
cites_path = '/Users/amylei/Desktop/CPSC583/Project/Code/dataset/citeseer-doc-classification/citeseer.cites'

node_features = []
node_labels = []
node_ids = []

with open(content_path, 'r') as f:
    for line in f:
        parts = line.strip().split('\t')
        node_ids.append(parts[0])
        node_features.append(list(map(float, parts[1:-1])))
        node_labels.append(parts[-1])

label_encoder = LabelEncoder()
node_labels = label_encoder.fit_transform(node_labels)

node_features = np.array(node_features, dtype=np.float32)
node_labels = np.array(node_labels, dtype=np.int64)

node_id_map = {node_ids[i]: i for i in range(len(node_ids))}

edges = []

with open(cites_path, 'r') as f:
    for line in f:
        try:
            parts = line.strip().split('\t')
            src_node = node_id_map[parts[0]]
            dst_node = node_id_map[parts[1]]
            edges.append([src_node, dst_node])
        except:
            continue

edges = torch.tensor(edges, dtype=torch.long).t().contiguous()

data = Data(x=torch.tensor(node_features), edge_index=edges, y=torch.tensor(node_labels))

num_nodes = node_features.shape[0]
train_size = int(0.8*num_nodes)
test_size = num_nodes - train_size

all_indices = list(range(num_nodes))
random.shuffle(all_indices)

train_indices = all_indices[:train_size]
test_indices = all_indices[train_size:]

train_mask = torch.zeros(num_nodes, dtype=torch.bool)
train_mask[train_indices] = 1

test_mask = torch.zeros(num_nodes, dtype=torch.bool)
test_mask[test_indices] = 1

data.train_mask = train_mask
data.test_mask = test_mask

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(node_features, node_labels)

feature_importances = rf.feature_importances_
important_features_idx = np.argsort(feature_importances)[::-1][:50]

node_features = node_features[:, important_features_idx] 

data.x = torch.tensor(node_features, dtype=torch.float)

processed_data_path = 'processed_data.pt'
torch.save(data, processed_data_path)
print(f"Processed data saved to {processed_data_path}")


data = torch.load(processed_data_path)

class GraphSAGE(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(in_channels, 32) 
        self.conv2 = SAGEConv(32, out_channels) 

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index)) 
        x = self.conv2(x, edge_index) 
        return x 


def train_and_evaluate(model, data, epochs=200):
    optimizer = Adam(model.parameters(), lr=0.01)
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        out = model(data)
        
        loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        if epoch % 50 == 0:
            print(f'[GraphSAGE] Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}')

    model.eval()
    out = model(data)

    preds = out.argmax(dim=1)
    correct = preds[data.test_mask] == data.y[data.test_mask]

    accuracy = correct.sum().item() / correct.size(0)
    precision = precision_score(data.y[data.test_mask].cpu(), preds[data.test_mask].cpu(), average='macro', zero_division=1)
    recall = recall_score(data.y[data.test_mask].cpu(), preds[data.test_mask].cpu(), average='macro', zero_division=1)
    f1 = f1_score(data.y[data.test_mask].cpu(), preds[data.test_mask].cpu(), average='macro', zero_division=1)

    return accuracy, precision, recall, f1

num_runs = 3
accuracies, precisions, recalls, f1_scores = [], [], [], []

for _ in range(num_runs):
    model = GraphSAGE(in_channels=node_features.shape[1], out_channels=len(np.unique(node_labels)))
    accuracy, precision, recall, f1 = train_and_evaluate(model, data)
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)

accuracy_mean = np.mean(accuracies)
accuracy_std = np.std(accuracies)

precision_mean = np.mean(precisions)
precision_std = np.std(precisions)

recall_mean = np.mean(recalls)
recall_std = np.std(recalls)

f1_mean = np.mean(f1_scores)
f1_std = np.std(f1_scores)

print("\nEvaluation Results of GraphSAGE+RF after {} runs:".format(n_runs))
print(f'Accuracy: {accuracy_mean:.4f} ± {accuracy_std:.4f}')
print(f'Precision: {precision_mean:.4f} ± {precision_std:.4f}')
print(f'Recall: {recall_mean:.4f} ± {recall_std:.4f}')
print(f'F1: {f1_mean:.4f} ± {f1_std:.4f}')


Processed data saved to processed_data.pt
[GraphSAGE] Epoch 1/200, Loss: 1.8172
[GraphSAGE] Epoch 51/200, Loss: 0.6703
[GraphSAGE] Epoch 101/200, Loss: 0.5164


  data = torch.load(processed_data_path)


[GraphSAGE] Epoch 151/200, Loss: 0.3709
[GraphSAGE] Epoch 1/200, Loss: 1.8066
[GraphSAGE] Epoch 51/200, Loss: 0.6662
[GraphSAGE] Epoch 101/200, Loss: 0.5052
[GraphSAGE] Epoch 151/200, Loss: 0.3549
[GraphSAGE] Epoch 1/200, Loss: 1.8050
[GraphSAGE] Epoch 51/200, Loss: 0.6675
[GraphSAGE] Epoch 101/200, Loss: 0.5067
[GraphSAGE] Epoch 151/200, Loss: 0.3532

Evaluation Results of GraphSAGE+RF after 3 runs:
Accuracy: 0.6918 ± 0.0105
Precision: 0.6585 ± 0.0112
Recall: 0.6578 ± 0.0099
F1: 0.6575 ± 0.0105
