# testing logistic matrix factorization code

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
from logistic_mf import * 

In [4]:
# reading a csv into pandas
train = pd.read_csv("train_books_ratings.csv")
valid = pd.read_csv("valid_books_ratings.csv")

In [5]:
valid.head()

Unnamed: 0,user,item,rating,timestamp
0,A2E2F4MLVYDGEQ,000100039X,0,1393286400
1,A386A9WE42M4PG,000100039X,0,1371772800
2,A1OGQA984MTKBH,000100039X,0,1372118400
3,A1VVBHGM8DFIZ4,000100039X,0,1387152000
4,AD6E4Y092Y4KP,000100039X,0,1392336000


In [6]:
train_df = encode_data(train, train=None)

In [7]:
valid_df = encode_data(valid, train=train)

In [8]:
valid_df.tail()

Unnamed: 0,user,item,rating,timestamp
446156,710749,45868,1,1333584000
446159,590160,45082,1,1335139200
446161,910780,62851,1,1057881600
446163,764823,32666,1,1241308800
446166,345398,325600,1,1378598400


In [9]:
num_users = len(train_df.user.unique())
num_items = len(train_df.item.unique())
print(num_users, num_items) 

1312778 659279


In [10]:
model = MF(num_users, num_items)

In [11]:
u = torch.LongTensor(train_df.user.values[:5])
v = torch.LongTensor(train_df.item.values[:5]) 

In [12]:
model(u, v)

tensor([0.5128, 0.5193, 0.5154, 0.5170, 0.5178], grad_fn=<SigmoidBackward>)

In [13]:
model = MF(num_users, num_items, emb_size=100) 

In [14]:
training(model, train_df, valid_df, epochs=10, lr=0.1, wd=1e-5)

train loss 0.694 valid loss 0.722 valid acc 0.429
train loss 0.674 valid loss 0.639 valid acc 0.764
train loss 0.624 valid loss 0.639 valid acc 0.741
train loss 0.643 valid loss 0.655 valid acc 0.656
train loss 0.663 valid loss 0.641 valid acc 0.675
train loss 0.651 valid loss 0.623 valid acc 0.761
train loss 0.630 valid loss 0.617 valid acc 0.757
train loss 0.615 valid loss 0.619 valid acc 0.681
train loss 0.609 valid loss 0.619 valid acc 0.677
train loss 0.606 valid loss 0.615 valid acc 0.748


In [15]:
training(model, train_df, valid_df, epochs=15, lr=0.01, wd=1e-5)

train loss 0.608 valid loss 0.613 valid acc 0.766
train loss 0.611 valid loss 0.614 valid acc 0.772
train loss 0.615 valid loss 0.614 valid acc 0.772
train loss 0.618 valid loss 0.615 valid acc 0.771
train loss 0.621 valid loss 0.616 valid acc 0.768
train loss 0.622 valid loss 0.616 valid acc 0.768
train loss 0.622 valid loss 0.616 valid acc 0.771
train loss 0.621 valid loss 0.615 valid acc 0.773
train loss 0.619 valid loss 0.615 valid acc 0.774
train loss 0.618 valid loss 0.614 valid acc 0.774
train loss 0.616 valid loss 0.614 valid acc 0.774
train loss 0.615 valid loss 0.614 valid acc 0.774
train loss 0.614 valid loss 0.614 valid acc 0.774
train loss 0.614 valid loss 0.614 valid acc 0.774
train loss 0.614 valid loss 0.614 valid acc 0.773
