In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
from math import sqrt


train_df = pd.read_csv('./data/train.csv')
test_df = pd.read_csv('./data/test.csv')
train_df.drop(['ID','Age', 'Location','Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher'],axis = 1, inplace = True)
test_df.drop(['ID','Age', 'Location','Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher'],axis = 1, inplace = True)
submission_df = pd.read_csv('./data/sample_submission.csv')

In [2]:
user_dict = {user_id : i for i, user_id in enumerate(train_df['User-ID'].unique())}
book_dict = {book_id : i for i, book_id in enumerate(train_df['Book-ID'].unique())}

In [3]:
train_df['User-ID'] = train_df['User-ID'].map(user_dict)
train_df['Book-ID'] = train_df['Book-ID'].map(book_dict)

test_df['User-ID'] = test_df['User-ID'].map(user_dict)
test_df['Book-ID'] = test_df['Book-ID'].map(book_dict)

In [4]:
class RecommenderModel(nn.Module):
    def __init__(self, num_users, num_books, embedding_size = 50):
        super(RecommenderModel, self).__init__()
        self.user_embeddings = nn.Embedding(num_users, embedding_size)
        self.book_embeddings = nn.Embedding(num_books, embedding_size)
        self.fc1 = nn.Linear(embedding_size * 2, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        
    def forward(self, user, book):
        user_embed = self.user_embeddings(user)
        book_embed = self.book_embeddings(book)
        x = torch.cat([user_embed, book_embed], dim = 1)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = RecommenderModel(num_users = len(user_dict), num_books = len(book_dict)).cuda()

In [5]:
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
loss_fn = torch.nn.MSELoss()

In [6]:
# train model

from tqdm import tqdm, tqdm_notebook

num_epochs = 30
batch_size = 128
for epoch in tqdm(range(num_epochs)):
    train_losses = []
    for i in range(0, len(train_df), batch_size):
        batch = train_df[i:i+batch_size]
        user = torch.tensor(batch['User-ID'].values, dtype = torch.long).cuda()
        book = torch.tensor(batch['Book-ID'].values, dtype = torch.long).cuda()
        rating = torch.tensor(batch['Book-Rating'].values, dtype = torch.float32).cuda()
        
        optimizer.zero_grad()
        output = model(user, book)
        loss = loss_fn(output.view(-1), rating)
        loss.backward()
        optimizer.step()
        
        train_losses.append(loss.item())
        
    print('Epoch : {} Train Loss : {:.4f}'.format(epoch+1, sum(train_losses)/len(train_losses)))

  3%|▎         | 1/30 [00:20<10:00, 20.70s/it]

Epoch : 1 Train Loss : 14.8123


  7%|▋         | 2/30 [00:40<09:26, 20.24s/it]

Epoch : 2 Train Loss : 13.8204


 10%|█         | 3/30 [01:00<08:56, 19.86s/it]

Epoch : 3 Train Loss : 12.5886


 13%|█▎        | 4/30 [01:19<08:31, 19.68s/it]

Epoch : 4 Train Loss : 11.2819


 17%|█▋        | 5/30 [01:38<08:09, 19.60s/it]

Epoch : 5 Train Loss : 10.0907


 20%|██        | 6/30 [01:58<07:49, 19.54s/it]

Epoch : 6 Train Loss : 9.0012


 23%|██▎       | 7/30 [02:17<07:28, 19.50s/it]

Epoch : 7 Train Loss : 7.9934


 27%|██▋       | 8/30 [02:37<07:08, 19.48s/it]

Epoch : 8 Train Loss : 7.0929


 30%|███       | 9/30 [02:56<06:48, 19.47s/it]

Epoch : 9 Train Loss : 6.3032


 33%|███▎      | 10/30 [03:16<06:29, 19.46s/it]

Epoch : 10 Train Loss : 5.6312


 37%|███▋      | 11/30 [03:35<06:10, 19.48s/it]

Epoch : 11 Train Loss : 5.0922


 40%|████      | 12/30 [03:55<05:50, 19.48s/it]

Epoch : 12 Train Loss : 4.6478


 43%|████▎     | 13/30 [04:14<05:30, 19.45s/it]

Epoch : 13 Train Loss : 4.3039


 47%|████▋     | 14/30 [04:33<05:10, 19.43s/it]

Epoch : 14 Train Loss : 4.0878


 50%|█████     | 15/30 [04:53<04:51, 19.43s/it]

Epoch : 15 Train Loss : 3.8556


 53%|█████▎    | 16/30 [05:12<04:31, 19.43s/it]

Epoch : 16 Train Loss : 3.7061


 57%|█████▋    | 17/30 [05:32<04:12, 19.44s/it]

Epoch : 17 Train Loss : 3.5145


 60%|██████    | 18/30 [05:51<03:53, 19.45s/it]

Epoch : 18 Train Loss : 3.3857


 63%|██████▎   | 19/30 [06:11<03:33, 19.45s/it]

Epoch : 19 Train Loss : 3.2317


 67%|██████▋   | 20/30 [06:30<03:14, 19.44s/it]

Epoch : 20 Train Loss : 3.0793


 70%|███████   | 21/30 [06:49<02:54, 19.44s/it]

Epoch : 21 Train Loss : 2.9675


 73%|███████▎  | 22/30 [07:09<02:35, 19.45s/it]

Epoch : 22 Train Loss : 2.8648


 77%|███████▋  | 23/30 [07:28<02:16, 19.46s/it]

Epoch : 23 Train Loss : 2.7641


 80%|████████  | 24/30 [07:48<01:56, 19.46s/it]

Epoch : 24 Train Loss : 2.6416


 83%|████████▎ | 25/30 [08:07<01:37, 19.47s/it]

Epoch : 25 Train Loss : 2.5757


 87%|████████▋ | 26/30 [08:27<01:17, 19.49s/it]

Epoch : 26 Train Loss : 2.5047


 90%|█████████ | 27/30 [08:46<00:58, 19.36s/it]

Epoch : 27 Train Loss : 2.4182


 93%|█████████▎| 28/30 [09:05<00:38, 19.37s/it]

Epoch : 28 Train Loss : 2.3637


 97%|█████████▋| 29/30 [09:25<00:19, 19.44s/it]

Epoch : 29 Train Loss : 2.3108


100%|██████████| 30/30 [09:44<00:00, 19.50s/it]

Epoch : 30 Train Loss : 2.2214





In [7]:
with torch.no_grad():
    user = torch.tensor(test_df['User-ID'].values, dtype = torch.long).cuda()
    book = torch.tensor(test_df['Book-ID'].values, dtype = torch.long).cuda()
    output = model(user, book)
    predicted_ratings = output.cpu().view(-1).tolist()

In [8]:
submission_df['Book-Rating'] = predicted_ratings
submission_df.to_csv('submission2.csv',index = False)