## Load Movies data file

In [3]:
import pandas as pd
movies = pd.read_csv(r"C:\Users\91992\Downloads\movies.csv")# Load the CSV file

print(movies.head())# Display the first few rows of the DF

   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  


# Preprocess movies name

In [4]:
import re

def clean_title(title):
    cleaned_title = re.sub("[^a-zA-Z0-9 ]", "", title) # Use regular expressions to remove any character that is not a letter, number, or space
    return cleaned_title

movies["clean_title"] = movies["title"].apply(clean_title)#apply it on movies file

print(movies)

       movieId                               title  \
0            1                    Toy Story (1995)   
1            2                      Jumanji (1995)   
2            3             Grumpier Old Men (1995)   
3            4            Waiting to Exhale (1995)   
4            5  Father of the Bride Part II (1995)   
...        ...                                 ...   
62418   209157                           We (2018)   
62419   209159           Window of the Soul (2001)   
62420   209163                    Bad Poems (2018)   
62421   209169                 A Girl Thing (2001)   
62422   209171      Women of Devil's Island (1962)   

                                            genres  \
0      Adventure|Animation|Children|Comedy|Fantasy   
1                       Adventure|Children|Fantasy   
2                                   Comedy|Romance   
3                             Comedy|Drama|Romance   
4                                           Comedy   
...                        

## TF-IDF Vectorizer

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

vectorizer = TfidfVectorizer(ngram_range=(1, 2))# Initialize the TF-IDF vectorizer with unigrams and bigrams

tfidf = vectorizer.fit_transform(movies["clean_title"])# Transform the cleaned movie titles into TF-IDF vectors


def search(title):
    
    title = clean_title(title)# Clean the input title
    
    query_vec = vectorizer.transform([title])# Transform the cleaned input title into a TF-IDF vector
    
    similarity = cosine_similarity(query_vec, tfidf).flatten()# Calculate the cosine similarity between the query vector and all movie vector
    
    indices = np.argpartition(similarity, -5)[-5:]# Get the indices of the top 5 most similar movies
    
    results = movies.iloc[indices].iloc[::-1]# Retrieve and return the most similar movies in descending order of similarity
    
    return results


In [6]:
import ipywidgets as widgets
from IPython.display import display

# Create a text input widget for entering the movie title
movie_input = widgets.Text(
    value='Toy Story',  # Default value
    description='Movie Title:',  # Label for the input
    disabled=False  # Allows input to be editable
)

# Create an output widget to display the results
movie_list = widgets.Output()

# Function to handle typing in the text input
def on_type(data):
    with movie_list:
        movie_list.clear_output()  # Clear previous results
        title = data["new"]  # Get the current input value
        if len(title) > 5:  # Only search if the title is longer than 5 characters
            display(search(title))  # Display search results

# Set up the observer to monitor changes in the text input
movie_input.observe(on_type, names='value')

# Display the input widget and the results output area
display(movie_input, movie_list)


Text(value='Toy Story', description='Movie Title:')

Output()

In [2]:
! pip install ipywidgets 
 # library to make search box

