In [1]:
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split

In [2]:
# Load the movielens-100k dataset  UserID::MovieID::Rating::Timestamp
data = Dataset.load_builtin('ml-100k')
trainset, testset = train_test_split(data, test_size=.15)

In [3]:
# Use user_based true/false to switch between user-based or item-based collaborative filtering
algo = KNNWithMeans(k=50, sim_options={'name': 'pearson_baseline', 'user_based': True})
algo.fit(trainset)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x7fd1cc5c1940>

In [4]:
# we can now query for specific predicions
uid = str(196)  # raw user id
iid = str(302)  # raw item id

In [5]:
# get a prediction for specific users and items.
pred = algo.predict(uid, iid, verbose=True)

user: 196        item: 302        r_ui = None   est = 4.27   {'actual_k': 50, 'was_impossible': False}


In [6]:
# run the trained model against the testset
test_pred = algo.test(testset)

In [7]:
test_pred

[Prediction(uid='793', iid='240', r_ui=4.0, est=3.076209815099296, details={'actual_k': 50, 'was_impossible': False}),
 Prediction(uid='490', iid='1383', r_ui=1.0, est=2.9216686674669865, details={'actual_k': 1, 'was_impossible': False}),
 Prediction(uid='354', iid='479', r_ui=4.0, est=4.364432662659559, details={'actual_k': 50, 'was_impossible': False}),
 Prediction(uid='618', iid='367', r_ui=3.0, est=3.317864650475782, details={'actual_k': 50, 'was_impossible': False}),
 Prediction(uid='267', iid='128', r_ui=5.0, est=4.218866748186778, details={'actual_k': 35, 'was_impossible': False}),
 Prediction(uid='830', iid='228', r_ui=3.0, est=4.008141670973153, details={'actual_k': 50, 'was_impossible': False}),
 Prediction(uid='92', iid='171', r_ui=4.0, est=3.8266135680456825, details={'actual_k': 38, 'was_impossible': False}),
 Prediction(uid='637', iid='741', r_ui=1.0, est=2.359369055317309, details={'actual_k': 25, 'was_impossible': False}),
 Prediction(uid='593', iid='274', r_ui=3.0, est

In [8]:
# get RMSE
print("User-based Model : Test Set")
accuracy.rmse(test_pred, verbose=True)

User-based Model : Test Set
RMSE: 0.9302


0.930226112057395

In [9]:
A = [1, 7, 8, 4, 0, 4, 2, 0.3]
B = [4, 8, 3, 9, 5, 0.4, 3, 1]

In [11]:
import numpy as np

In [15]:
A = np.array(A)
B = np.array(B)

In [16]:
A*B

array([ 4. , 56. , 24. , 36. ,  0. ,  1.6,  6. ,  0.3])

In [18]:
np.sqrt(sum(A*A))

12.251122397560152

In [19]:
np.sqrt(sum(B*B))

14.323407415835103

In [23]:
from numpy.linalg import norm

In [24]:
np.dot(A,B)/(norm(A)*norm(B))

0.7288670550725382

array([0.0227949 , 0.31912866, 0.13676942, 0.20515414, 0.        ,
       0.00911796, 0.03419236, 0.00170962])