<a href="https://colab.research.google.com/github/yueguo1997/SNA_recommender_system/blob/main/SNA_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os
cur_path = "/content/drive/My Drive/Colab Notebooks/"
os.chdir(cur_path)
!pwd

Mounted at /content/drive
/content/drive/My Drive/Colab Notebooks


In [None]:
import pandas as pd

# Reading in datasets/book1.csv
df = pd.read_csv('bonanza_buyer_item_seller_id_R_2.txt', sep='::', header=None, names=['buyer', 'item', 'seller', 'r2'])

# Printing out the head of the dataset
print(df.head())

  df = pd.read_csv('bonanza_buyer_item_seller_id_R_2.txt', sep='::', header=None, names=['buyer', 'item', 'seller', 'r2'])


   buyer  item  seller            r2
0      0     0       0  1.548565e+09
1      0     0       1  1.548565e+09
2      1     0       2  1.539144e+09
3      2     0       3  1.528690e+09
4      3     0       3  1.547701e+09


In [None]:
import networkx as nx
import csv
G = nx.MultiGraph()

# Add nodes for buyers, items, and sellers
for node_type in ['buyer', 'item', 'seller']:
    nodes = df[node_type].unique()
    for node in nodes:
        G.add_node(node, type=node_type)

# Add edges for interactions
for _, row in df.iterrows():
    buyer = row['buyer']
    item = row['item']
    seller = row['seller']
    G.add_edge(buyer, item, type='buys')
    G.add_edge(item, seller, type='sold_by')

In [None]:
# Print some statistics about the graph
print("Number of nodes:", G.number_of_nodes())
print("Number of edges:", G.number_of_edges())
print("Number of unique edge types:", len(set(nx.get_edge_attributes(G, 'type').values())))

Number of nodes: 8922
Number of edges: 78200
Number of unique edge types: 2


In [None]:
78200/8922

8.76485093028469

In [None]:
import torch
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree

# Define the Graph Convolutional Network (GCN) layer
class GCNConv(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(GCNConv, self).__init__(aggr='add')  # "Add" aggregation.
        self.lin = torch.nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index):
        # Step 1: Add self-loops to the adjacency matrix.
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        # Step 2: Linearly transform node feature matrix.
        x = self.lin(x)

        # Step 3: Compute normalization term.
        row, col = edge_index
        deg = degree(col, x.size(0), dtype=x.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        # Step 4: Propagate the node features through the graph.
        return self.propagate(edge_index, x=x, norm=norm)

    def message(self, x_j, norm):
        # Normalize node features.
        return norm.view(-1, 1) * x_j

# Convert the NetworkX graph to PyTorch Geometric format
from torch_geometric.data import Data

x = torch.tensor([[1], [2], [3]])  # Feature matrix
y = torch.tensor([0, 1, 0])  # Target labels
edges = [(u, v) for u, v in G.edges()]  # Edge list
edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()  # Transpose edge list
data = Data(x=x, edge_index=edge_index, y=y)

# Create a GCN model and train it on the data
from torch_geometric.nn import GCNConv, Sequential, ReLU
from torch.nn import Linear

model = Sequential(
    GCNConv(1, 16),
    ReLU(),
    GCNConv(16, 1)
)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

model.train()
optimizer.zero_grad()
out = model(data.x.float(), data.edge_index)
loss = criterion(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()

# Predict the ratings for all edges in the graph
model.eval()
with torch.no_grad():
    ratings = model(data.x.float(), data.edge_index).detach().numpy().squeeze()

# Add the predicted ratings as an edge attribute to the NetworkX graph
nx.set_edge_attributes(G, values=dict(zip(edges, ratings)), name='rating')


In [None]:
import pandas as pd
import numpy as np

# Load the predicted ratings as a Pandas DataFrame
edges = list(G.edges())
ratings = nx.get_edge_attributes(G, 'rating')
ratings_df = pd.DataFrame({'buyer': [e[0] for e in edges], 'item': [e[1] for e in edges], 'predicted_rating': list(ratings.values())})

# Split the ratings data into training and testing sets
from sklearn.model_selection import train_test_split

train_ratings, test_ratings = train_test_split(ratings_df, test_size=0.2, random_state=42)

# Convert the ratings data to PyTorch Geometric format
x_train = torch.tensor(train_ratings[['buyer', 'item']].values, dtype=torch.long).t().contiguous()
y_train = torch.tensor(train_ratings['predicted_rating'].values, dtype=torch.float)
x_test = torch.tensor(test_ratings[['buyer', 'item']].values, dtype=torch.long).t().contiguous()
y_test = torch.tensor(test_ratings['predicted_rating'].values, dtype=torch.float)
data_train = Data(x=x_train, y=y_train)
data_test = Data(x=x_test, y=y_test)

# Define a Graph Neural Network (GNN) for rating prediction
from torch_geometric.nn import GATConv, global_mean_pool
from torch.nn import Linear, ReLU

class GNN(torch.nn.Module):
    def __init__(self, num_users, num_items, hidden_channels):
        super(GNN, self).__init__()
        self.conv1 = GATConv(num_users + num_items, hidden_channels)
        self.conv2 = GATConv(hidden_channels, hidden_channels)
        self.lin1 = Linear(hidden_channels, hidden_channels)
        self.lin2 = Linear(hidden_channels, 1)

    def forward(self, x, edge_index):
        x = torch.cat([x, global_mean_pool(x, torch.zeros(x.shape[0], dtype=torch.long, device=x.device))], dim=1)
        x = ReLU()(self.conv1(x, edge_index))
        x = ReLU()(self.conv2(x, edge_index))
        x = global_mean_pool(x, torch.zeros(x.shape[0], dtype=torch.long, device=x.device))
        x = ReLU()(self.lin1(x))
        x = self.lin2(x)
        return x

# Train the GNN on the ratings data
model = GNN(num_users=len(buyers), num_items=len(items), hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

model.train()
for epoch in range(100):
    optimizer.zero_grad()
    out = model(data_train.x.long(), data_train.edge_index)
    loss = criterion(out, data_train.y)
    loss.backward()
    optimizer.step()

# Generate recommendations for a specific buyer
buyer_id = 1  # ID of the buyer to generate recommendations for
buyer_items = set(buyers[buyers['buyer'] == buyer_id]['item'])  # Items the buyer has already purchased

# Create a mask for the test set that only includes items the buyer has not purchased
test_mask = np.logical_not(test_ratings['item'].isin(buyer_items)).values

# Use the trained GNN to predict ratings for the test set
model.eval()
with torch.no_grad():
    test_preds = model(data_test.x.long(), data_test.edge_index).detach().numpy().squeeze()
    test_ratings.loc[test_mask, 'predicted_rating'] = test_preds[test_mask]

# Generate a list of
buyer_test_ratings = test_ratings[test_ratings['buyer'] == buyer_id]
buyer_test_ratings = buyer_test_ratings.sort_values(by='predicted_rating', ascending=False)
recommended_items = list(buyer_test_ratings['item'].values)

print("Recommended items for buyer {}: {}".format(buyer_id, recommended_items))

# Evaluate the performance of the recommendation system
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(test_ratings['predicted_rating'], test_ratings['r2'])
print("Mean Squared Error: {:.4f}".format(mse))