An Alternate pytorch Geometric script tot he dgl function

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch_geometric.transforms as T
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [None]:
authors_df = pd.read_csv("../data/author_id.csv")
edges_df = pd.read_csv("../data/co_author_relation.csv")
features_df = pd.read_csv("../data/author_coauthor_features.csv")

In [None]:
author_id_to_number = {author_id: idx for idx, author_id in enumerate(authors_df['Author'])}
edges_df['Author_id_num_1'] = edges_df['Author1'].map(author_id_to_number)
edges_df['Author_id_num_2'] = edges_df['Author2'].map(author_id_to_number)
edges_df_copy = edges_df.copy()
columns = ["Author1","Author2"]
edges_df_copy.drop(columns=columns, inplace=True)
edges_df_copy.head()

In [None]:
num_nodes = len(authors_df)
num_features = len(features_df.columns) - 1  # Subtract 1 for the 'Author' column
num_edges = len(edges_df)

co_author_mapping = set(zip(edges_df_copy['Author_id_num_1'], edges_df_copy['Author_id_num_2']))
edge_index = torch.tensor(edges_df_copy[['Author_id_num_1', 'Author_id_num_2']].values, dtype=torch.long).t().contiguous()
x = torch.tensor(features_df.iloc[:, 1:].values, dtype=torch.float32)
y = torch.tensor([1 if (i, j) in co_author_mapping or (j, i) in co_author_mapping else 0 for i, j in edge_index.t().tolist()], dtype=torch.float32)

In [None]:


data = Data(x=x, edge_index=edge_index, y=y)
data = data if not hasattr(data, 'transform') else data.transform(T.NormalizeFeatures())
data.num_features

In [None]:
x.shape

In [None]:
class LinkPredictionModel(nn.Module):
    def __init__(self, num_nodes, num_features):
        super(LinkPredictionModel, self).__init__()
        self.conv1 = GCNConv(num_features, 128)
        self.conv2 = GCNConv(128, 64)
        self.fc = nn.Linear(64, num_features)  # Output num_features for each edge

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        x = torch.relu(x)
        x = self.fc(x)
        return x

# Initialize the model, optimizer, and loss criterion
model = LinkPredictionModel(num_nodes, num_features)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.BCEWithLogitsLoss()

In [None]:
def train(epoch):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()


In [None]:
for epoch in tqdm(range(1, 201)):
    train(epoch)

In [None]:
target_author_index = 100

# Prepare target author's features
target_author_features = torch.tensor(data.x[target_author_index], dtype=torch.float16)

# Inference and predicting potential co-authors
model.eval()
predicted_scores = model(target_author_features.unsqueeze(0), data.edge_index)

# Find top 5 potential co-authors
sorted_indices = torch.argsort(predicted_scores, descending=True)
top_indices = [idx.item() for idx in sorted_indices if idx != target_author_index][:5]

print("Top 5 Potential Co-Authors:")
for i, index in enumerate(top_indices):
    print(f"{i + 1}. Author index: {index}")