In [12]:
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from surprise import KNNBasic, NMF
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import accuracy

### preparing rating data

In [3]:
rating_df = pd.read_csv('data/rating_shortened.csv')
rating_df = rating_df[rating_df.rating!=-1]
rating_df = rating_df.groupby(by=['user_id','anime_id']).mean().reset_index()
rating_df.columns = ['user','item','rating']
rating_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1741989 entries, 0 to 1741988
Data columns (total 3 columns):
 #   Column  Dtype  
---  ------  -----  
 0   user    int64  
 1   item    int64  
 2   rating  float64
dtypes: float64(1), int64(2)
memory usage: 39.9 MB


In [4]:
rating_df.to_csv("data/rating_collab_filt.csv", index=False)

### loading rating data

In [5]:
reader = Reader(
    line_format='user item rating', sep=',', skip_lines=1, rating_scale=(2, 3))

course_dataset = Dataset.load_from_file("data/rating_collab_filt.csv", reader=reader)

In [6]:
trainset, testset = train_test_split(course_dataset, test_size=.3)

In [7]:
print(f"Total {trainset.n_users} users and {trainset.n_items} items in the trainingset")

Total 65346 users and 858 items in the trainingset


### KNN based

In [10]:
names=['cosine', 'MSD', 'pearson_baseline']
for name in names:
    model=KNNBasic(sim_options={'user_based': False, 'name':name})
    model.fit(trainset)
    preds=model.test(testset)
    print('-'*30)
    print('Name:', name)
    print(accuracy.rmse(preds))
    print('-'*30)

Computing the cosine similarity matrix...
Done computing similarity matrix.
------------------------------
Name: cosine
RMSE: 5.3645
5.364494522958875
------------------------------
Computing the msd similarity matrix...
Done computing similarity matrix.
------------------------------
Name: MSD
RMSE: 5.3645
5.364489522775506
------------------------------
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
------------------------------
Name: pearson_baseline
RMSE: 5.3645
5.364502104227845
------------------------------


In [11]:
model=KNNBasic(sim_options={'user_based': False})
model.fit(trainset)
preds=model.test(testset)
print('-'*30)
print('Name:', 'default')
print(accuracy.rmse(preds))
print('-'*30)

Computing the msd similarity matrix...
Done computing similarity matrix.
------------------------------
Name: default
RMSE: 5.3645
5.364489522775506
------------------------------


### NMF

In [15]:
model=NMF(verbose=True, random_state=123, n_factors=30)
model.fit(trainset)
preds=model.test(testset)
print('-'*30)
print('NMF with n_factors =', 30)
print(accuracy.rmse(preds))
print('-'*30)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 20
Processing epoch 21
Processing epoch 22
Processing epoch 23
Processing epoch 24
Processing epoch 25
Processing epoch 26
Processing epoch 27
Processing epoch 28
Processing epoch 29
Processing epoch 30
Processing epoch 31
Processing epoch 32
Processing epoch 33
Processing epoch 34
Processing epoch 35
Processing epoch 36
Processing epoch 37
Processing epoch 38
Processing epoch 39
Processing epoch 40
Processing epoch 41
Processing epoch 42
Processing epoch 43
Processing epoch 44
Processing epoch 45
Processing epoch 46
Processing epoch 47
Processing epoch 48
Processing epoch 49
----------

In [16]:
model=NMF(verbose=True, random_state=123, n_factors=50)
model.fit(trainset)
preds=model.test(testset)
print('-'*30)
print('NMF with n_factors =', 50)
print(accuracy.rmse(preds))
print('-'*30)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 20
Processing epoch 21
Processing epoch 22
Processing epoch 23
Processing epoch 24
Processing epoch 25
Processing epoch 26
Processing epoch 27
Processing epoch 28
Processing epoch 29
Processing epoch 30
Processing epoch 31
Processing epoch 32
Processing epoch 33
Processing epoch 34
Processing epoch 35
Processing epoch 36
Processing epoch 37
Processing epoch 38
Processing epoch 39
Processing epoch 40
Processing epoch 41
Processing epoch 42
Processing epoch 43
Processing epoch 44
Processing epoch 45
Processing epoch 46
Processing epoch 47
Processing epoch 48
Processing epoch 49
----------

In [17]:
model=NMF(verbose=True, random_state=123, n_factors=100)
model.fit(trainset)
preds=model.test(testset)
print('-'*30)
print('NMF with n_factors =', 100)
print(accuracy.rmse(preds))
print('-'*30)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 20
Processing epoch 21
Processing epoch 22
Processing epoch 23
Processing epoch 24
Processing epoch 25
Processing epoch 26
Processing epoch 27
Processing epoch 28
Processing epoch 29
Processing epoch 30
Processing epoch 31
Processing epoch 32
Processing epoch 33
Processing epoch 34
Processing epoch 35
Processing epoch 36
Processing epoch 37
Processing epoch 38
Processing epoch 39
Processing epoch 40
Processing epoch 41
Processing epoch 42
Processing epoch 43
Processing epoch 44
Processing epoch 45
Processing epoch 46
Processing epoch 47
Processing epoch 48
Processing epoch 49
----------