## **Install The Library And Load Data From the Cloud**

In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
# Load the dataset from Google Drive
df_ratings = pd.read_csv('/content/drive/My Drive/rating.csv')

Mounted at /content/drive


In [2]:
rating = pd.read_csv('/content/drive/My Drive/rating.csv', usecols=['userId','movieId','rating'], dtype={'userId': 'Int32', 'movieId': 'int32', 'rating': 'float32'})
rating.head()


Unnamed: 0,userId,movieId,rating
0,1,2,3.5
1,1,29,3.5
2,1,32,3.5
3,1,47,3.5
4,1,50,3.5


In [3]:
movies =  pd.read_csv('movie.csv',usecols=['movieId','title'],dtype={'movieId': 'int32', 'title': 'str'})


## Data merging



In [4]:
all_in_one = pd.merge(rating,movies,on='movieId')
all_in_one.head()

Unnamed: 0,userId,movieId,rating,title
0,1,2,3.5,Jumanji (1995)
1,5,2,3.0,Jumanji (1995)
2,13,2,3.0,Jumanji (1995)
3,29,2,3.0,Jumanji (1995)
4,34,2,3.0,Jumanji (1995)


## Rating Calculation

In [5]:
all_in_one = pd.merge(rating,movies,on='movieId')
all_in_one.head()
Calculate_Rating = all_in_one.dropna(axis = 0, subset = ['title'])
movie_ratingCount = (Calculate_Rating.
     groupby(by = ['title'])['rating'].
     count().
     reset_index().
     rename(columns = {'rating': 'totalRatingCount'})
     [['title', 'totalRatingCount']]
    )
movie_ratingCount

Unnamed: 0,title,totalRatingCount
0,#chicagoGirl: The Social Network Takes on a Di...,2
1,$ (Dollars) (1971),11
2,$5 a Day (2008),27
3,$9.99 (2008),32
4,$ellebrity (Sellebrity) (2012),2
...,...,...
24219,À nous la liberté (Freedom for Us) (1931),107
24220,À propos de Nice (1930),1
24221,Árido Movie (2005),1
24222,Åsa-Nisse - Wälkom to Knohult (2011),1


## Data Combination

In [6]:
combine_all = Calculate_Rating.merge(movie_ratingCount, left_on = 'title', right_on = 'title', how = 'left')
combine_all.head()

Unnamed: 0,userId,movieId,rating,title,totalRatingCount
0,1,2,3.5,Jumanji (1995),13780
1,5,2,3.0,Jumanji (1995),13780
2,13,2,3.0,Jumanji (1995),13780
3,29,2,3.0,Jumanji (1995),13780
4,34,2,3.0,Jumanji (1995),13780


## Feltring Movies With Nombre of Rating Less Than 1000

In [7]:
minimum_number_of_ratings = 1000
rating_popular_movie= combine_all.query('totalRatingCount >= @minimum_number_of_ratings')
rating_popular_movie.head()
rating_popular_movie.shape

(10386435, 5)

## Creation Pivot Creation

In [8]:
matrix_pivot=rating_popular_movie.pivot_table(index='title',columns='userId',values='rating').fillna(0)

In [9]:
matrix_pivot.head()

userId,1,2,3,4,5,6,7,8,9,10,...,85779,85780,85781,85782,85783,85784,85785,85786,85787,85788
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"'burbs, The (1989)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
(500) Days of Summer (2009),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
*batteries not included (1987),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10 Things I Hate About You (1999),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"10,000 BC (2008)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Sparse Matrix

In [10]:
movie_features_df_matrix = csr_matrix(matrix_pivot.values)

## Nearest Neighbors Algorithme (KNN)

In [11]:
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(movie_features_df_matrix)

## Choose The Movie

In [12]:
# Input the title of the movie you want to use as the query movie
# Get the index of the query movie
query_movie_title = input("Enter the title of the movie you want to use as the query: ")


Enter the title of the movie you want to use as the query: Jumanji (1995)


## Movie Recommendations

In [13]:
query_index = matrix_pivot.index.get_loc(query_movie_title)
# Use the trained kNN model to find the nearest neighbors to the query movie
distances, indices = model_knn.kneighbors(matrix_pivot.iloc[query_index, :].values.reshape(1, -1), n_neighbors=10)
# Print the recommended movies along with their distances from the query movie
print("Recommendations for {0}:\n".format(matrix_pivot.index[query_index]))
for i in range(0, len(distances.flatten())):
    if i == 0:
        print("Original movie: {0}".format(matrix_pivot.index[query_index]))
    else:
        print("{0}: {1}, with distance of {2}".format(i, matrix_pivot.index[indices.flatten()[i]], distances.flatten()[i]))

Recommendations for Jumanji (1995):

Original movie: Jumanji (1995)
1: Mask, The (1994), with distance of 0.48002833127975464
2: Mrs. Doubtfire (1993), with distance of 0.4908592104911804
3: Lion King, The (1994), with distance of 0.4915050268173218
4: Home Alone (1990), with distance of 0.5008390545845032
5: Jurassic Park (1993), with distance of 0.5054531097412109
6: Aladdin (1992), with distance of 0.5299914479255676
7: Speed (1994), with distance of 0.5381497144699097
8: Santa Clause, The (1994), with distance of 0.5429033041000366
9: Waterworld (1995), with distance of 0.5507691502571106
