In [2]:
import pandas as pd
import numpy as np
from surprise import Reader, Dataset

In [19]:
movies_df = pd.read_csv("./preprocessed_data/movies_preprocessed.csv", index_col=0)

## **1. Loading and building train set**

In [5]:
ratings_df = pd.read_csv('./preprocessed_data/ratings_small_preprocessed.csv', index_col=0)

reader = Reader(rating_scale=(0,10))
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)


In [6]:
trainset = data.build_full_trainset()


## **2. Building a model**

In [7]:
from surprise.prediction_algorithms.knns import KNNWithMeans

k = 40
min_k = 4
sim_options = {'name': 'cosine', 'user_based': True}

KNNalgo = KNNWithMeans(k=k, min_k=min_k, sim_options=sim_options)
KNNalgo.fit(trainset)


Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x7f880787a320>

## **3. Choosing a data to the resport**

In [11]:
ratings_df['userId'].value_counts()

564    886
547    739
15     559
73     546
452    531
      ... 
374      5
227      5
448      4
444      4
663      2
Name: userId, Length: 671, dtype: int64

In [16]:
ratings_df['movieId'].value_counts()

296     324
318     311
593     304
260     291
480     274
       ... 
3043      1
2486      1
4680      1
4706      1
129       1
Name: movieId, Length: 2781, dtype: int64

In [32]:
users_id = [480, 260, 15]
movies_id = [414, 11, 296] 

## **4. Model Evaluation**

In [33]:
for user_id, movie_id in zip(users_id, movies_id):
    print(movies_df.loc[movies_df['id'] == movie_id]['title'].item())
    print(KNNalgo.predict(user_id, movie_id))

Batman Forever
user: 480        item: 414        r_ui = None   est = 6.47   {'actual_k': 7, 'was_impossible': False}
Star Wars
user: 260        item: 11         r_ui = None   est = 7.64   {'actual_k': 40, 'was_impossible': False}
Terminator 3: Rise of the Machines
user: 15         item: 296        r_ui = None   est = 7.04   {'actual_k': 40, 'was_impossible': False}
