In [1]:
import pandas
df = pandas.read_csv('ratings_small.csv')
df_movies = pandas.read_csv('tmdb_5000_movies.csv')
ids = df_movies['id'].tolist()
df = df[df['movieId'].isin(ids)]
df = df.rename(columns={'userId': 'user', 'movieId': 'item'})
df

Unnamed: 0,user,item,rating,timestamp
13,1,2105,4.0,1260759139
16,1,2294,2.0,1260759108
26,2,62,3.0,835355749
30,2,153,4.0,835355441
31,2,161,3.0,835355493
...,...,...,...,...
99983,671,4995,4.0,1064891537
99993,671,5902,3.5,1064245507
100004,1,111,5.0,1609284358
100005,1,111,5.0,1609286487


In [2]:
from surprise import Dataset
from surprise import Reader
reader = Reader(rating_scale=(1, 5))

In [3]:
data = Dataset.load_from_df(df[["user", "item", "rating"]], reader)
data

<surprise.dataset.DatasetAutoFolds at 0x7fa17e13c5b0>

In [4]:
from surprise import KNNWithMeans
sim_options = {
    "name": "pearson",
    "user_based": False
}
algo = KNNWithMeans(sim_options=sim_options)

In [5]:
trainset = data.build_full_trainset()
trainset_iids = list(trainset.all_items())
iid_converter = lambda x: trainset.to_raw_iid(x)
trainset_raw_iids = list(map(iid_converter, trainset_iids))

In [6]:
algo.fit(trainset)

Computing the pearson similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x7fa17e13ca30>

In [7]:
def get_neighbors(itemId, N=5):
    if itemId not in trainset_raw_iids:
        return []
    iid = trainset_raw_iids.index(itemId)
    neighbors_iids = algo.get_neighbors(iid, N)
    ids = list(map(lambda value: trainset_raw_iids[value], neighbors_iids))
    return ids

#neighbors = get_neighbors(182)
df_movies['neighborhood'] = df_movies['id'].copy().apply(get_neighbors)

In [14]:
from surprise.model_selection import train_test_split, cross_validate
results = cross_validate(
    algo = algo, data = data, measures=['MAE', 'RMSE', 'MSE'], 
    cv=5, return_train_measures=True, verbose=False
)

print() 
print('Root Mean Squared Error', results['test_rmse'].mean())
print('Mean Squared Error', results['test_mse'].mean())
print('Mean Absolute Error', results['test_mae'].mean())

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.

Root Mean Squared Error 0.9344590273176113
Mean Squared Error 0.8732582050011887
Mean Absolute Error 0.7122368503414603
