In [4]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
# This defines the 'device' variable to avoid the NameError
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device initialized: {device}")


Device initialized: cpu


In [5]:
# Load the dataset (ratings.csv uploaded to Colab)
try:
    df = pd.read_csv('ratings.csv')
    df = df[['userId', 'movieId', 'rating']]

    # Encoding IDs to ensure they are sequential (required for Embedding layers)
    user_encoder = LabelEncoder()
    movie_encoder = LabelEncoder()

    df['user_idx'] = user_encoder.fit_transform(df['userId'])
    df['movie_idx'] = movie_encoder.fit_transform(df['movieId'])

    num_users = df['user_idx'].nunique()
    num_movies = df['movie_idx'].nunique()

    print(f"Number of Unique Users: {num_users}")
    print(f"Number of Unique Movies: {num_movies}")
    print("âœ… Data preprocessing completed successfully.")
except FileNotFoundError:
    print(" Error: ratings.csv not found. Please upload it to the side panel.")

Number of Unique Users: 7107
Number of Unique Movies: 17174
âœ… Data preprocessing completed successfully.


In [6]:
class CollaborativeFilteringNet(nn.Module):
    def __init__(self, n_users, n_items, emb_size=50):
        super(CollaborativeFilteringNet, self).__init__()

        # Creating Embedding layers for Users and Items
        # This treats User IDs and Movie IDs as tokens similar to NLP word tokens
        self.user_embeddings = nn.Embedding(n_users, emb_size)
        self.item_embeddings = nn.Embedding(n_items, emb_size)

        # Initialize weights
        self.user_embeddings.weight.data.uniform_(0, 0.05)
        self.item_embeddings.weight.data.uniform_(0, 0.05)

    def forward(self, user_indices, item_indices):
        # Retrieve the embedding vectors for the given indices
        user_vecs = self.user_embeddings(user_indices)
        item_vecs = self.item_embeddings(item_indices)

        # Perform dot product to calculate the similarity/predicted rating
        dot_product = (user_vecs * item_vecs).sum(1)
        return dot_product

# Instantiate the model
model = CollaborativeFilteringNet(num_users, num_movies).to(device)
print("âœ… Embedding-based model architecture defined.")

âœ… Embedding-based model architecture defined.


In [7]:
# Split data into 80% training and 20% testing
train, test = train_test_split(df, test_size=0.2, random_state=42)

# Convert to PyTorch Tensors and move to device (CPU or GPU)
train_u = torch.LongTensor(train.user_idx.values).to(device)
train_m = torch.LongTensor(train.movie_idx.values).to(device)
train_r = torch.FloatTensor(train.rating.values).to(device)

test_u = torch.LongTensor(test.user_idx.values).to(device)
test_m = torch.LongTensor(test.movie_idx.values).to(device)
test_r = torch.FloatTensor(test.rating.values).to(device)

print("âœ… Data converted to Tensors and ready for training.")

âœ… Data converted to Tensors and ready for training.


In [8]:
criterion = nn.MSELoss() # Mean Squared Error Loss
optimizer = optim.Adam(model.parameters(), lr=0.01)

print("ðŸš€ Starting Training Loop...")
epochs = 10
batch_size = 64 # Processing in batches for efficiency

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()

    # Forward pass: using a subset for quick demonstration
    # You can use the full set if the computer resources allow
    predictions = model(train_u[:100000], train_m[:100000])
    loss = criterion(predictions, train_r[:100000])

    # Backward pass and optimization
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 2 == 0:
        print(f"Epoch {epoch+1}/{epochs} | Training Loss (MSE): {loss.item():.4f}")

print("ðŸŽ‰ Training finished successfully!")

ðŸš€ Starting Training Loop...
Epoch 2/10 | Training Loss (MSE): 13.2538
Epoch 4/10 | Training Loss (MSE): 12.6384
Epoch 6/10 | Training Loss (MSE): 11.7753
Epoch 8/10 | Training Loss (MSE): 10.6832
Epoch 10/10 | Training Loss (MSE): 9.3912
ðŸŽ‰ Training finished successfully!
