In [110]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


In [111]:
df = pd.read_csv('/content/sample_data/movies.csv')

In [112]:
df.head()

Unnamed: 0,movieId,title,genres,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,,,,,,,
1,2,Jumanji (1995),Adventure|Children|Fantasy,,,,,,,
2,3,Grumpier Old Men (1995),Comedy|Romance,,,,,,,
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,,,,,,,
4,5,Father of the Bride Part II (1995),Comedy,,,,,,,


In [113]:
import nltk as nltk

In [114]:
df['genres'][0]

'Adventure|Animation|Children|Comedy|Fantasy'

In [115]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=9743,stop_words='english')

In [116]:
vectors = cv.fit_transform(df['genres']).toarray()
vectors

array([[0, 1, 1, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [117]:
vectors[0]

array([0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0])

In [118]:
import nltk

In [119]:
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [120]:
def stem(text):
  y=[]
  for i in text.split():
    y.append(ps.stem(i))
  return " ".join(y)

In [121]:
df['genres'] = df['genres'].apply(stem)

In [122]:
ps.stem('dance')

'danc'

In [123]:
df['genres'][1200]

'crime|drama|thril'

In [124]:
cv.get_feature_names_out()

array(['action', 'adventure', 'animation', 'children', 'comedy', 'crime',
       'documentary', 'drama', 'fantasy', 'fi', 'film', 'genres',
       'horror', 'imax', 'listed', 'musical', 'mystery', 'noir',
       'romance', 'sci', 'thriller', 'war', 'western'], dtype=object)

In [125]:
from sklearn.metrics.pairwise import cosine_similarity

In [126]:
similarity = cosine_similarity(vectors)

In [127]:
df.head()

Unnamed: 0,movieId,title,genres,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,1,Toy Story (1995),adventure|animation|children|comedy|fantasi,,,,,,,
1,2,Jumanji (1995),adventure|children|fantasi,,,,,,,
2,3,Grumpier Old Men (1995),comedy|rom,,,,,,,
3,4,Waiting to Exhale (1995),comedy|drama|rom,,,,,,,
4,5,Father of the Bride Part II (1995),comedi,,,,,,,


In [128]:
df = df[['movieId','title','genres']]

In [132]:
df.tail()

Unnamed: 0,movieId,title,genres
9737,193581,Black Butler: Book of the Atlantic (2017),action|animation|comedy|fantasi
9738,193583,No Game No Life: Zero (2017),animation|comedy|fantasi
9739,193585,Flint (2017),drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),action|anim
9741,193609,Andrew Dice Clay: Dice Rules (1991),comedi


In [129]:
movie_index = df[df['title'] == 'Toy Story (1995)']
movie_index

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),adventure|animation|children|comedy|fantasi


In [130]:
def recommend(movie):
    if 'title' in df.columns:
        movie_index = df[df['title'] == movie]

        if not movie_index.empty:
            movie_index = movie_index.index[0]
            dist = similarity[movie_index]
            sorted_cosine_similarities = sorted(enumerate(dist), reverse=True, key=lambda x: x[1])[1:10]

            for i, similarity_score in sorted_cosine_similarities:
                similar_movie = df.iloc[i]['title']
                print(f"Movie: {similar_movie}, Similarity Score: {similarity_score}")
        else:
            print(f"Movie '{movie}' not found in the dataset.")
    else:
        print("The 'title' column is not present in the DataFrame.")

In [133]:
recommend('Flint (2017)')

Movie: Othello (1995), Similarity Score: 1.0
Movie: Dangerous Minds (1995), Similarity Score: 1.0
Movie: Cry, the Beloved Country (1995), Similarity Score: 1.0
Movie: Restoration (1995), Similarity Score: 1.0
Movie: Georgia (1995), Similarity Score: 1.0
Movie: Home for the Holidays (1995), Similarity Score: 1.0
Movie: Mr. Holland's Opus (1995), Similarity Score: 1.0
Movie: Boys of St. Vincent, The (1992), Similarity Score: 1.0
Movie: Basketball Diaries, The (1995), Similarity Score: 1.0
