# QUESTION 2: PRESENTATION

Importing Libraries

In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
#https://github.com/viruszig-cyber/movie_ecom_system.git

In [5]:
data = pd.read_csv('netflix_titles.csv')

In [6]:
data.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


### EXPLORATORY DATA ANALYSIS

In [7]:
data.isnull().sum()

show_id            0
type               0
title              0
director        2634
cast             825
country          831
date_added        10
release_year       0
rating             4
duration           3
listed_in          0
description        0
dtype: int64

In [8]:
data['type'].unique()

array(['Movie', 'TV Show'], dtype=object)

In [9]:
data['listed_in'].unique()

array(['Documentaries', 'International TV Shows, TV Dramas, TV Mysteries',
       'Crime TV Shows, International TV Shows, TV Action & Adventure',
       'Docuseries, Reality TV',
       'International TV Shows, Romantic TV Shows, TV Comedies',
       'TV Dramas, TV Horror, TV Mysteries', 'Children & Family Movies',
       'Dramas, Independent Movies, International Movies',
       'British TV Shows, Reality TV', 'Comedies, Dramas',
       'Crime TV Shows, Docuseries, International TV Shows',
       'Dramas, International Movies',
       'Children & Family Movies, Comedies',
       'British TV Shows, Crime TV Shows, Docuseries',
       'TV Comedies, TV Dramas', 'Documentaries, International Movies',
       'Crime TV Shows, Spanish-Language TV Shows, TV Dramas',
       'Thrillers',
       'International TV Shows, Spanish-Language TV Shows, TV Action & Adventure',
       'International TV Shows, TV Action & Adventure, TV Dramas',
       'Comedies, International Movies',
       'Comedies, 

In [10]:
import warnings

# Ignore all warnings
def ignore_warnings():
    warnings.filterwarnings("ignore")

# Ignoring warnings after calling the function
ignore_warnings()

Data preprocessing

In [11]:
# Assuming 'Description' is the column containing movie descriptions
data['description'] = data['description'].str.lower().str.replace('[^a-zA-Z#^\w\s]', '')

# Use TF-IDF Vectorizer to convert text descriptions into numerical features
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(data['description'])

In [12]:
data.head(5)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"as her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","after crossing paths at a party, a cape town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",to protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",in a city of coaching centers known to train i...


In [13]:
data['description'].head(5)

0    as her father nears the end of his life, filmm...
1    after crossing paths at a party, a cape town t...
2    to protect his family from a powerful drug lor...
3    feuds, flirtations and toilet talk go down amo...
4    in a city of coaching centers known to train i...
Name: description, dtype: object

In [14]:
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from collections import Counter

In [19]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Tinashe\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


True

In [20]:
text_column = data['listed_in']

# Tokenize the text and remove stop words
stop_words = set(stopwords.words('english'))
tokenized_text = [word_tokenize(text.lower()) for text in text_column]
filtered_text = [[word for word in tokens if word not in stop_words and word.isalnum()] for tokens in tokenized_text]

Extract and count phrases

In [21]:
def extract_phrases(text_list, n=2):
    ngram_counts = Counter()
    for text in text_list:
        ngrams = zip(*[text[i:] for i in range(n)])
        ngram_counts.update(ngrams)
    return ngram_counts

# Extract and count most common phrases (bigrams or trigrams)
ngram_counts = extract_phrases(filtered_text, n=2)  # Change `n` to 2 for bigrams, 3 for trigrams, etc.

# Display the most common phrases
most_common_phrases = ngram_counts.most_common(10)  # Change `10` to display more or fewer phrases
for phrase, count in most_common_phrases:
    print(f"{phrase}: {count} occurrences")

('tv', 'shows'): 2854 occurrences
('international', 'movies'): 2752 occurrences
('international', 'tv'): 1351 occurrences
('dramas', 'international'): 1219 occurrences
('shows', 'tv'): 1105 occurrences
('action', 'adventure'): 1027 occurrences
('tv', 'dramas'): 763 occurrences
('independent', 'movies'): 756 occurrences
('children', 'family'): 641 occurrences
('family', 'movies'): 641 occurrences


In [36]:

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [37]:
# Count vectorizer
CV = CountVectorizer()
converted_matrix = CV.fit_transform(data['description'])

In [38]:
# Cosine similarity
cosine_similarity = cosine_similarity(converted_matrix)

In [39]:
cosine_similarity.shape

(8807, 8807)

In [43]:
cosine_similarity


array([[1.        , 0.06913011, 0.26681494, ..., 0.15316792, 0.14980118,
        0.33351867],
       [0.06913011, 1.        , 0.03573708, ..., 0.0410305 , 0.        ,
        0.        ],
       [0.26681494, 0.03573708, 1.        , ..., 0.15836152, 0.07744031,
        0.37931034],
       ...,
       [0.15316792, 0.0410305 , 0.15836152, ..., 1.        , 0.13336627,
        0.07918076],
       [0.14980118, 0.        , 0.07744031, ..., 0.13336627, 1.        ,
        0.07744031],
       [0.33351867, 0.        , 0.37931034, ..., 0.07918076, 0.07744031,
        1.        ]])

Clustering movies using K-Means Algorithm

In [22]:
# Perform K-Means clustering
k = 8  # Specify the number of clusters (you can adjust this)
kmeans = KMeans(n_clusters=k, random_state=42)
clusters = kmeans.fit_predict(X)

# Add cluster labels to the DataFrame
data['Cluster'] = clusters

Display movies in respective clusters

In [23]:
# Display movies in each cluster
for cluster_id in range(k):
    print(f"Cluster {cluster_id}:")
    cluster_movies = data[data['Cluster'] == cluster_id]['title']
    for movie in cluster_movies:
        print(f"- {movie}")
    print()  # Add a newline for separation


Cluster 0:
- Crime Stories: India Detectives
- The Father Who Moves Mountains
- Jaws
- Jaws 2
- Training Day
- Lucifer
- The Women and the Murderer
- Show Dogs
- La casa de papel
- In the Cut
- Agatha Christie's Crooked House
- Chappie
- In Too Deep
- Wind River
- Nneka The Pretty Serpent
- The Defeated
- The Kingdom
- Pineapple Express
- Team America: World Police
- Sanitation Day
- La ley de Herodes
- Private Network: Who Killed Manuel Buendía?
- Fear Street Part 2: 1978
- Brick Mansions
- Major Grom: Plague Doctor
- The Mire
- A Land Imagined
- I AM A KILLER
- Marcella
- Scream
- You Are My Spring
- Haseen Dillruba
- Mortel
- Sophie: A Murder in West Cork
- Deadwind
- Elite
- L.A.’s Finest
- Starsky & Hutch
- Black Space
- Small Town Crime
- Who Killed Sara?
- I Am All Girls
- Nayattu
- Sleepless
- Monster
- Green Zone
- The Lovely Bones
- The Unremarkable Juanquini
- Only Mine
- Zoot Suit
- Signal
- Maximum Risk
- Murder Maps
- Wazir
- B: The Beginning
- Catch.er
- Paradise PD
- Th

Display top 5 movies in each cluster

In [24]:
# Determine the number of clusters
num_clusters = len(data['Cluster'].unique())

# Display movies in each cluster
for cluster_id in range(num_clusters):
    print(f"Cluster {cluster_id}:")
    
    # Filter movies belonging to the current cluster
    cluster_movies = data[data['Cluster'] == cluster_id]
    
    # Sort movies in the cluster by a certain criterion (e.g., based on ratings, popularity, etc.)
    sorted_cluster_movies = cluster_movies.sort_values(by='listed_in', ascending=False)  # Example sorting by 'Rating'
    
    # Display top 5 movies in the cluster
    top_movies = sorted_cluster_movies.head(5)['title']  # Display top 5 movies
    for idx, movie in enumerate(top_movies, start=1):
        print(f"{idx}. {movie}")
    
    print()  # Add a newline for separation

Cluster 0:
1. Cop Car
2. Fallen
3. The Drowning
4. Goldstone
5. Along Came a Spider

Cluster 1:
1. Running Out Of Time
2. Synchronic
3. Deadly Illusions
4. Intrusion
5. Domestic Disturbance

Cluster 2:
1. Fractured
2. American Hangman
3. Godzilla
4. Messiah
5. Special

Cluster 3:
1. Bright: The Music Videos
2. Never Have I Ever
3. Eastsiders
4. Master of None
5. Grace and Frankie

Cluster 4:
1. Pretty Guardian Sailor Moon Eternal The Movie
2. Five Came Back: The Reference Films
3. Grand Army
4. The Magicians
5. Tiny Pretty Things

Cluster 5:
1. Rust Creek
2. The Game
3. Fanatic
4. Sightless
5. Perfect Stranger

Cluster 6:
1. Staged Killer
2. The Trust
3. The Student
4. Angels & Demons
5. Sliver

Cluster 7:
1. Bad Match
2. Pretty Little Stalker
3. Killer Cove
4. Inconceivable
5. Stranger Things



In [46]:
data.to_csv('cluster.csv')

Display movies in a single cluster

In [25]:
# Display movies in Cluster 0
cluster_id = 0
cluster_movies = data[data['Cluster'] == cluster_id]['title']

if not cluster_movies.empty:
    print(f"Movies in Cluster {cluster_id}:")
    for movie in cluster_movies:
        print(f"- {movie}")
else:
    print(f"No movies found in Cluster {cluster_id}")

Movies in Cluster 0:
- Crime Stories: India Detectives
- The Father Who Moves Mountains
- Jaws
- Jaws 2
- Training Day
- Lucifer
- The Women and the Murderer
- Show Dogs
- La casa de papel
- In the Cut
- Agatha Christie's Crooked House
- Chappie
- In Too Deep
- Wind River
- Nneka The Pretty Serpent
- The Defeated
- The Kingdom
- Pineapple Express
- Team America: World Police
- Sanitation Day
- La ley de Herodes
- Private Network: Who Killed Manuel Buendía?
- Fear Street Part 2: 1978
- Brick Mansions
- Major Grom: Plague Doctor
- The Mire
- A Land Imagined
- I AM A KILLER
- Marcella
- Scream
- You Are My Spring
- Haseen Dillruba
- Mortel
- Sophie: A Murder in West Cork
- Deadwind
- Elite
- L.A.’s Finest
- Starsky & Hutch
- Black Space
- Small Town Crime
- Who Killed Sara?
- I Am All Girls
- Nayattu
- Sleepless
- Monster
- Green Zone
- The Lovely Bones
- The Unremarkable Juanquini
- Only Mine
- Zoot Suit
- Signal
- Maximum Risk
- Murder Maps
- Wazir
- B: The Beginning
- Catch.er
- Paradi

In [26]:
# Display movies in Cluster 1
cluster_id = 1
cluster_movies = data[data['Cluster'] == cluster_id]['title']

if not cluster_movies.empty:
    print(f"Movies in Cluster {cluster_id}:")
    for movie in cluster_movies:
        print(f"- {movie}")
else:
    print(f"No movies found in Cluster {cluster_id}")

Movies in Cluster 1:
- Ganglands
- Jailbirds New Orleans
- Confessions of an Invisible Girl
- Falsa identidad
- Intrusion
- Resurrection: Ertugrul
- Dark Skies
- Nailed It
- Nightbooks
- Little Singham - Black Shadow
- Tughlaq Durbar
- Tughlaq Durbar (Telugu)
- Omo Ghetto: the Saga
- Shadow Parties
- Gurgaon
- Shikara
- Cold Mountain
- Green Lantern
- HQ Barbers
- Love in a Puff
- Once Upon a Time in America
- Poseidon
- Turning Point: 9/11 and the War on Terror
- Hometown Cha-Cha-Cha
- I Heart Arlo
- Kucch To Hai
- The Dirty Picture
- Post Mortem: No One Dies in Skarnes
- The Water Man
- The Witcher: Nightmare of the Wolf
- The Loud House Movie
- Black Island
- Bombay
- Quartet
- Esperando la carroza
- Friday Night Lights
- Planet 51
- The Haunting in Connecticut 2: Ghosts of Georgia
- The Lincoln Lawyer
- Bartkowiak
- The Flash
- The Snitch Cartel: Origins
- A Second Chance:  Rivals!
- African America
- Bankrolled
- Chhota Bheem Aur Hanuman
- Touch Your Heart
- 2 Weeks in Lagos
- The

In [27]:
# Display movies in Cluster 0
cluster_id = 2
cluster_movies = data[data['Cluster'] == cluster_id]['title']

if not cluster_movies.empty:
    print(f"Movies in Cluster {cluster_id}:")
    for movie in cluster_movies:
        print(f"- {movie}")
else:
    print(f"No movies found in Cluster {cluster_id}")

Movies in Cluster 2:
- Bangkok Breaking
- Jeans
- Minsara Kanavu
- If I Leave Here Tomorrow: A Film About Lynyrd Skynyrd
- El patrón, radiografía de un crimen
- My Boss's Daughter
- Osmosis Jones
- Same Kind of Different as Me
- Untold: Crime & Penalties
- Thimmarusu
- Clickbait
- The River Runner
- Like Crazy
- Gone for Good
- Takizawa Kabuki ZERO 2020 The Movie
- Hit & Run
- The Prince Who Turns into a Frog
- Boyka: Undisputed
- Catch Me If You Can
- Freedomland
- Space Cowboys
- Eyes of a Thief
- Milkwater
- Return of the Prodigal Son
- Ujala
- Holiday on Mars
- Hampstead
- Snow Day
- Prime Time
- A Way Back Home
- Here Comes the Rain
- The Naked Director
- Broken
- Silver Linings Playbook
- Sarbath
- Gold Statue
- Dirty John
- One Lagos Night
- Mad for Each Other
- Special
- Ahaan
- Tottaa Pataaka Item Maal
- Dance of the Forty One
- The Whole Nine Yards
- Seven
- The Innocent
- Durarara!!
- Two Distant Strangers
- Black Is Beltza
- You're Everything To Me
- Sentinelle
- Banyuki
- 

In [28]:
# Display movies in Cluster 3
cluster_id = 3
cluster_movies = data[data['Cluster'] == cluster_id]['title']

if not cluster_movies.empty:
    print(f"Movies in Cluster {cluster_id}:")
    for movie in cluster_movies:
        print(f"- {movie}")
else:
    print(f"No movies found in Cluster {cluster_id}")

Movies in Cluster 3:
- Kota Factory
- Tayo and Little Wizards
- Angry Birds
- Chhota Bheem
- InuYasha the Movie 2: The Castle Beyond the Looking Glass
- Saved by the Bell
- Yowamushi Pedal
- Prey
- Octonauts: Above & Beyond
- Tayo the Little Bus
- Pororo - The Little Penguin
- Barbie Big City Big Dreams
- Janoskians: Untold and Untrue
- Oldsters
- C Kkompany
- Kyaa Kool Hai Hum
- The November Man
- The Secret Diary of an Exchange Student
- Fast & Furious Spy Racers
- Grace and Frankie
- Valeria
- Cocaine Cowboys: The Kings of Miami
- 44 Cats
- My Girl 2
- Open Season: Scared Silly
- Outer Banks
- Mighty Express
- Chhota Bheem - Dinosaur World
- Chhota Bheem & Krishna: Pataliputra- City of the Dead
- Chhota Bheem And The Crown of Valhalla
- Chhota Bheem Ka Roosi Romanch
- Fear Street Part 3: 1666
- My Amanda
- Never Have I Ever
- Ridley Jones
- Finding Hubby
- Cat People
- Some Assembly Required
- Fear Street Part 1: 1994
- Life as We Know It
- No Strings Attached
- #Selfie 69
- Neverth

In [29]:
# Display movies in Cluster 4
cluster_id = 4
cluster_movies = data[data['Cluster'] == cluster_id]['title']

if not cluster_movies.empty:
    print(f"Movies in Cluster {cluster_id}:")
    for movie in cluster_movies:
        print(f"- {movie}")
else:
    print(f"No movies found in Cluster {cluster_id}")

Movies in Cluster 4:
- Europe's Most Dangerous Man: Otto Skorzeny in Spain
- Love on the Spectrum
- InuYasha the Movie: Affections Touching Across Time
- Pokémon Master Journeys: The Series
- Mighty Raju
- Into the Night
- Tobot Galaxy Detectives
- Anjaam
- Hotel Del Luna
- Q-Force
- Brave Animated Series
- How to Be a Cowboy
- Rhyme & Reason
- The Guns of Navarone
- Sparking Joy
- Once Upon a Time in Mumbaai
- Rebellion
- RIDE ON TIME
- Bob Ross: Happy Accidents, Betrayal & Greed
- Really Love
- Oggy Oggy
- Manifest
- Everything Will Be Fine
- Untold: Deal With the Devil
- Lokillo: Nothing's the Same
- Monster Hunter: Legends of the Guild
- The Railway Man
- Heavy
- Centaurworld
- Khawatir
- Myth & Mogul: John DeLorean
- Fantastic Fungi
- Wynonna Earp
- Chhota Bheem aur Krishna vs Zimbara
- Chhota Bheem: The Rise of Kirmada
- Explained
- BEASTARS
- Surf's Up
- My Unorthodox Life
- How I Became a Superhero
- Code Lyoko
- Kim's Convenience
- Larva
- Rehmataan
- The New Legends of Monkey

In [30]:
# Display movies in Cluster 5
cluster_id = 5
cluster_movies = data[data['Cluster'] == cluster_id]['title']

if not cluster_movies.empty:
    print(f"Movies in Cluster {cluster_id}:")
    for movie in cluster_movies:
        print(f"- {movie}")
else:
    print(f"No movies found in Cluster {cluster_id}")

Movies in Cluster 5:
- Dick Johnson Is Dead
- The Starling
- Dear White People
- Ankahi Kahaniya
- Stories by Rabindranath Tagore
- Untold: Breaking Point
- Crocodile Dundee in Los Angeles
- House Party
- Labyrinth
- Major Dad
- Snervous Tyler Oakley
- The Blue Lagoon
- Bread Barbershop
- Kyo Kii... Main Jhuth Nahin Bolta
- Titletown High
- Family Reunion
- Open Your Eyes
- Sweet Girl
- Memories of a Murderer: The Nilsen Tapes
- Quam's Money
- Tango Feroz
- '76
- 30 Rock
- I missed you: Director's Cut
- My Girl
- The Edge of Seventeen
- Valentine's Day
- I'm Glad I Did
- Sexy Beasts
- The Twilight Saga: Breaking Dawn: Part 1
- The Twilight Saga: Breaking Dawn: Part 2
- Her Private Life
- Home Again
- Puffin Rock
- Richie Rich
- Shtisel
- Workin' Moms
- Dennis the Menace
- Mary Magdalene
- Quarantine Tales
- The American
- The Game
- The Karate Kid
- Young Royals
- Droppin' Cash: Los Angeles
- The Daily Life of the Immortal King
- Wonder Boy
- Sex/Life
- Sisters on Track
- No Time for S

In [31]:
# Display movies in Cluster 6
cluster_id = 6
cluster_movies = data[data['Cluster'] == cluster_id]['title']

if not cluster_movies.empty:
    print(f"Movies in Cluster {cluster_id}:")
    for movie in cluster_movies:
        print(f"- {movie}")
else:
    print(f"No movies found in Cluster {cluster_id}")

Movies in Cluster 6:
- Blood & Water
- My Little Pony: A New Generation
- Sankofa
- The Great British Baking Show
- Vendetta: Truth, Lies and The Mafia
- Jaguar
- Monsters Inside: The 24 Faces of Billy Milligan
- Avvai Shanmughi
- Go! Go! Cory Carson: Chrissy Takes the Wheel
- Grown Ups
- Paranoia
- Chicago Party Aunt
- Sex Education
- Squid Game
- The Stronghold
- He-Man and the Masters of the Universe
- Jaws: The Revenge
- My Heroes Were Cowboys
- The Smart Money Woman
- Castle and Castle
- Dharmakshetra
- InuYasha the Movie 3: Swords of an Honorable Ruler
- Naruto Shippuden the Movie: Blood Prison
- Naruto Shippûden the Movie: Bonds
- Naruto Shippûden the Movie: The Will of Fire
- Naruto Shippuden: The Movie
- Naruto Shippuden: The Movie: The Lost Tower
- Naruto the Movie 2: Legend of the Stone of Gelel
- Naruto the Movie 3: Guardians of the Crescent Moon Kingdom
- Naruto the Movie: Ninja Clash in the Land of Snow
- Numberblocks
- Raja Rasoi Aur Anya Kahaniyan
- Schumacher
- Too Hot

In [32]:
# Display movies in Cluster 7
cluster_id = 7
cluster_movies = data[data['Cluster'] == cluster_id]['title']

if not cluster_movies.empty:
    print(f"Movies in Cluster {cluster_id}:")
    for movie in cluster_movies:
        print(f"- {movie}")
else:
    print(f"No movies found in Cluster {cluster_id}")

Movies in Cluster 7:
- Midnight Mass
- Je Suis Karl
- Birth of the Dragon
- Jaws 3
- Safe House
- InuYasha the Movie 4: Fire on the Mystic Island
- Firedrake the Silver Dragon
- Letters to Juliet
- D.P.
- Krishna Cottage
- Kyaa Kool Hain Hum 3
- Man in Love
- The Chair
- A Faraway Land
- O Kadhal Kanmani
- Out of my league
- Pahuna
- The Piano
- La diosa del asfalto
- Chennai Express
- Aftermath
- Autumn's Concerto
- Ije: The Journey
- Two Fathers
- You're My Destiny
- Hunter X Hunter (2011)
- Poms
- Flower Girl
- Blood Red Sky
- Feels Like Ishq
- Kingdom: Ashin of the North
- Chhota Bheem & Krishna: Mayanagari
- Too Hot to Handle: Brazil
- Okupas
- Born to Play
- Mama Drama
- Therapy
- Three Thieves
- Terrace House: Opening New Doors
- Mobile Suit Gundam Hathaway
- Voiceless
- Mommy Issues
- Into the Wind
- The Seventh Day
- Love Is a Story
- Locked Up
- The Judgement
- Kambili: The Whole 30 Yards
- Besieged Bread
- A Man For The Week End
- Security
- The Reason I Jump
- Tragic Jungle

In [33]:
# Display movies in Cluster 8
cluster_id = 8
cluster_movies = data[data['Cluster'] == cluster_id]['title']

if not cluster_movies.empty:
    print(f"Movies in Cluster {cluster_id}:")
    for movie in cluster_movies:
        print(f"- {movie}")
else:
    print(f"No movies found in Cluster {cluster_id}")

No movies found in Cluster 8


In [34]:
import pickle
pickle.dump(data.to_dict(),open('movie_recom_dict.pkl','wb'))


In [42]:
pickle.dump(data.to_dict(),open('cosine_similarity.pkl','wb'))
