In [1]:
%load_ext Cython

import pandas as pd
import numpy as np
import datetime
from surprise import SVD
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise.model_selection import train_test_split

In [2]:
from recommender_algo.editable_svd import EditableSVD

In [3]:
# Get data from the MovieLense 100k dataset:
dev_file = "../data/ml_100k/ratings.csv"
prod_file = "../data/ml-20m/ratings.csv"
ratings_df = pd.read_csv(dev_file, dtype={
    'userId': np.int32,
    'movieId': np.int32,
    'rating': np.float32,
    'timestamp': np.int32,
})

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)

trainset, testset = train_test_split(data, test_size=.25)

In [4]:
algo = EditableSVD()

train_start_time = datetime.datetime.now()
algo.fit(trainset)
train_end_time = datetime.datetime.now()
print("Training duration: " + str(train_end_time - train_start_time))

predictions = algo.test(testset)
accuracy.rmse(predictions)

Training duration: 0:00:07.625961
RMSE: 0.8765


0.8764575611118396

In [5]:
uid = str(196)  # raw user id (as in the ratings file). They are **strings**!
iid = str(302)  # raw item id (as in the ratings file). They are **strings**!
pred = algo.predict(uid, iid, r_ui=4, verbose=True)

user: 196        item: 302        r_ui = 4.00   est = 3.50   {'was_impossible': False}


In [6]:
rated_items = {302: 5, 1: 2}
algo.fit_new_user(76485793, rated_items)

In [8]:
algo.predict(76485793, 1, r_ui=2, verbose=True)

user: 76485793   item: 1          r_ui = 2.00   est = 3.95   {'was_impossible': False}


Prediction(uid=76485793, iid=1, r_ui=2, est=3.954373754046581, details={'was_impossible': False})