<a href="https://colab.research.google.com/github/vrjpt10/Movie_recommendation_system/blob/main/movie_recommendation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive 
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip -u "/content/drive/MyDrive/ml-25m.zip"
! cp -r ml-25m /content/drive/MyDrive

Archive:  /content/drive/MyDrive/ml-25m.zip
   creating: ml-25m/
  inflating: ml-25m/tags.csv         
  inflating: ml-25m/links.csv        
  inflating: ml-25m/README.txt       
  inflating: ml-25m/ratings.csv      
  inflating: ml-25m/genome-tags.csv  
  inflating: ml-25m/genome-scores.csv  
  inflating: ml-25m/movies.csv       


In [None]:
import pandas as pd
import re

from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

import ipywidgets as widgets
from IPython.display import display


In [None]:
movies = pd.read_csv("ml-25m/movies.csv")

In [None]:
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
62418,209157,We (2018),Drama
62419,209159,Window of the Soul (2001),Documentary
62420,209163,Bad Poems (2018),Comedy|Drama
62421,209169,A Girl Thing (2001),(no genres listed)


Build search engine

In [None]:
def clean_title1(title):
  re.sub("[^a-zA-Z0-9]","", title) # search through every letter in the title and remove everything except alphabets and numbers
  return title

In [None]:
movies['clean_title'] = movies["title"].apply(clean_title1)

Build term frequency matrix
(find informative term: inverse document frequency)

In [None]:
vectorizer = TfidfVectorizer(ngram_range = (1,2))
tfidf = vectorizer.fit_transform(movies['clean_title'])

In [None]:
def search(title):
  title = clean_title1(title)
  query_vec = vectorizer.transform([title])
  similarity = cosine_similarity(query_vec,tfidf).flatten()
  indices = np.argpartition(similarity, -5)[-5:]
  results = movies.iloc[indices][::-1]
  return results

In [None]:
ratings = pd.read_csv("ml-25m/ratings.csv")

In [None]:
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,5.0,1147880044
1,1,306,3.5,1147868817
2,1,307,5.0,1147868828
3,1,665,5.0,1147878820
4,1,899,3.5,1147868510
...,...,...,...,...
25000090,162541,50872,4.5,1240953372
25000091,162541,55768,2.5,1240951998
25000092,162541,56176,2.0,1240950697
25000093,162541,58559,4.0,1240953434


In [None]:
ratings.dtypes

userId         int64
movieId        int64
rating       float64
timestamp      int64
dtype: object

Finding users who liked the same movie

Creating recommentation score

In [None]:
def find_similar_movies(movie_id):
  # finding recs similar to us
  similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings['rating'] > 4 )]["userId"].unique()
  similar_user_rec = ratings[(ratings['userId'].isin(similar_users)) & (ratings['rating'] > 4)]['movieId']
  # finding recs over 10%
  similar_user_rec = similar_user_rec.value_counts() / len(similar_users)
  similar_user_rec = similar_user_rec[similar_user_rec > .1]
  # finding recs having common interests
  all_users = ratings[(ratings['movieId'].isin(similar_user_rec.index)) & (ratings["rating"] > 4)]
  all_users_rec = all_users['movieId'].value_counts() / len(all_users['userId'].unique())

  rec_percentages = pd.concat([similar_user_rec,all_users_rec], axis = 1)
  rec_percentages.columns = ['similar', 'all']

  rec_percentages['score'] = rec_percentages['similar'] / rec_percentages['all']
  
  rec_percentages = rec_percentages.sort_values('score', ascending= False)
  return rec_percentages.head(10).merge(movies,  left_index = True, right_on ='movieId')[['score', 'title', 'genres']]

Creating the interactive widget

In [None]:
movie_input_name = widgets.Text(
    value = 'Toy Story',
    description = "Movie Title:",
    disabled = False
)

recommendation_list = widgets.Output()

def on_type(data):
  with recommendation_list:
    recommendation_list.clear_output()
    title = data["new"]
    if len(title) > 3:
      results = search(title)
      movie_id = results.iloc[0]['movieId']
      display(find_similar_movies(movie_id))

movie_input_name.observe(on_type, names = 'value')
display(movie_input_name, recommendation_list)

Text(value='Toy Story', description='Movie Title:')

Output()