In [5]:
import pandas as pd
import numpy as np
import datetime
from surprise import SVD
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise.model_selection import train_test_split

# Get data from the MovieLense 100k dataset:
dev_file = "../data/ml_100k/ratings.csv"
prod_file = "../data/ml-20m/ratings.csv"
ratings_df = pd.read_csv(dev_file, dtype={
    'userId': np.int32,
    'movieId': np.int32,
    'rating': np.float32,
    'timestamp': np.int32,
})

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)

trainset, testset = train_test_split(data, test_size=.25)
algo = SVD()

train_start_time = datetime.datetime.now()
algo.fit(trainset)
train_end_time = datetime.datetime.now()
print("Training duration: " + str(train_end_time - train_start_time))

train_start_time = datetime.datetime.now()
algo.fit(trainset)
train_end_time = datetime.datetime.now()
print("Training duration: " + str(train_end_time - train_start_time))

predictions = algo.test(testset)
accuracy.rmse(predictions)

uid = str(196)  # raw user id (as in the ratings file). They are **strings**!
iid = str(302)  # raw item id (as in the ratings file). They are **strings**!

# get a prediction for specific users and items.
pred = algo.predict(uid, iid, r_ui=4, verbose=True)

Training duration: 0:00:03.341967
Training duration: 0:00:03.328145
RMSE: 0.8787
user: 196        item: 302        r_ui = 4.00   est = 3.50   {'was_impossible': False}


In [4]:
editedAlgo = SVD()
editedAlgo.pu = algo.pu
editedAlgo.qi = algo.qi
editedAlgo.bu = algo.bu
editedAlgo.bi = algo.bi
editedAlgo.lr_all = 0.05

train_start_time = datetime.datetime.now()
editedAlgo.fit(trainset)
train_end_time = datetime.datetime.now()
print("Training duration: " + str(train_end_time - train_start_time))

predictions = editedAlgo.test(testset)
accuracy.rmse(predictions)

Training duration: 0:00:03.347290
RMSE: 0.8826


0.8826205513180468