In [None]:
import json
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from torch_geometric.loader import DataLoader
from torch_geometric.data import Data
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from torch_geometric.utils.convert import from_networkx
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
from sklearn.preprocessing import StandardScaler
import joblib

In [57]:
class GCN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(input_size, hidden_size)
        self.conv2 = GCNConv(hidden_size, hidden_size)
        self.lin = nn.Linear(hidden_size, output_size)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = global_mean_pool(x, batch)
        x = self.lin(x)
        return x

In [None]:
# Load the model
input_size = 7
hidden_size = 512
output_size = 7

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GCN(input_size, hidden_size, output_size)
model.load_state_dict(torch.load("./models/gcn_nabil.pth", map_location=device), strict=True)
model = model.to(device)
model.eval()


In [None]:
graphs = torch.load("./graphs/cb_nabil.pt", weights_only=False)
train, val = train_test_split(graphs, test_size=0.2, random_state=12)
train_graphs = DataLoader(train, batch_size=32, shuffle=True)
val_graphs = DataLoader(val, batch_size=32, shuffle=False)
test_load = torch.load("./graphs/cb_nabil_test.pt")
test_graphs = DataLoader(test_load, batch_size=32, shuffle=False)

In [61]:
# Function to generate predictions
def generate_predictions(graphs):
    predictions = []
    ground_truth = []
    for batch in graphs:
        batch = batch.to(device)
        with torch.no_grad():
            output = model(batch.x, batch.edge_index, batch.batch)
            output = output.view(-1)
            predictions.append(output.cpu().numpy())
            ground_truth.append(batch.y.cpu().numpy())

    predictions = np.concatenate(predictions, axis=0)
    ground_truth = np.concatenate(ground_truth, axis=0)

    # Reshape to match the original feature dimensions
    predictions = predictions.reshape(-1, output_size)
    ground_truth = ground_truth.reshape(-1, output_size)

    # Inverse transform to the original scale
    predictions_original = scaler.inverse_transform(predictions)
    ground_truth_original = scaler.inverse_transform(ground_truth)

    # Convert to DataFrames
    predictions_df = pd.DataFrame(predictions_original, columns=df.columns)
    ground_truth_df = pd.DataFrame(ground_truth_original, columns=df.columns)

    # Add a 'published_date' column for plotting
    predictions_df['published_date'] = np.arange(len(predictions_df))
    ground_truth_df['published_date'] = np.arange(len(ground_truth_df))

    return predictions_df, ground_truth_df


In [62]:
# Generate predictions for validation and test sets
val_predictions_df, val_ground_truth_df = generate_predictions(val_graphs)
test_predictions_df, test_ground_truth_df = generate_predictions(test_graphs)


In [None]:
test_ground_truth_df.head()

In [None]:
test_predictions_df.head()

In [None]:
# Plot test set
for col in df.columns:
    plt.figure(figsize=(20, 10))
    sns.lineplot(data=test_ground_truth_df, x='published_date', y=col, color="blue", label="Test Ground Truth")
    sns.lineplot(data=test_predictions_df, x='published_date', y=col, color="orange", label="Test Predictions")
    plt.title(f"Test Set: {col}")
    plt.xlabel('Published Date')
    plt.ylabel('Value')
    plt.legend()
    plt.show()