In [4]:
import pandas as pd
import numpy as np
import pickle
from IPython.display import display, Image
import ipywidgets as widgets


In [5]:
# ✅ Load pickled movie data and similarity matrix
movies = pickle.load(open("movie_list.pkl", "rb"))
similarity = pickle.load(open("similarity.pkl", "rb"))


In [6]:
# Load movie and credit datasets
movies_df = pd.read_csv('tmdb_5000_movies.csv')
credits_df = pd.read_csv('tmdb_5000_credits.csv')

# Preview the datasets
movies_df.head(), credits_df.head()


(      budget                                             genres  \
 0  237000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   
 1  300000000  [{"id": 12, "name": "Adventure"}, {"id": 14, "...   
 2  245000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   
 3  250000000  [{"id": 28, "name": "Action"}, {"id": 80, "nam...   
 4  260000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   
 
                                        homepage      id  \
 0                   http://www.avatarmovie.com/   19995   
 1  http://disney.go.com/disneypictures/pirates/     285   
 2   http://www.sonypictures.com/movies/spectre/  206647   
 3            http://www.thedarkknightrises.com/   49026   
 4          http://movies.disney.com/john-carter   49529   
 
                                             keywords original_language  \
 0  [{"id": 1463, "name": "culture clash"}, {"id":...                en   
 1  [{"id": 270, "name": "ocean"}, {"id": 726, "na...                en   
 2 

In [7]:
# Merge movies and credits on the title
movies_df = movies_df.merge(credits_df, on='title')

# Keep only useful columns
movies_df = movies_df[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]

movies_df.head()


Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...","[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...","[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,206647,Spectre,A cryptic message from Bond’s past sends him o...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...","[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...","[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...","[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,49529,John Carter,"John Carter is a war-weary, former military ca...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 818, ""name"": ""based on novel""}, {""id"":...","[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


In [8]:
import ast

# Function to extract 'name' fields
def extract_names(obj):
    try:
        items = ast.literal_eval(obj)
        return [item['name'] for item in items]
    except:
        return []

# Function to extract director from crew
def extract_director(obj):
    try:
        items = ast.literal_eval(obj)
        return [item['name'] for item in items if item['job'] == 'Director']
    except:
        return []

# Apply functions
movies_df['genres'] = movies_df['genres'].apply(extract_names)
movies_df['keywords'] = movies_df['keywords'].apply(extract_names)
movies_df['cast'] = movies_df['cast'].apply(lambda x: extract_names(x)[:3])  # top 3 cast
movies_df['crew'] = movies_df['crew'].apply(extract_director)

movies_df.head()


Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...","[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon...","[Sam Worthington, Zoe Saldana, Sigourney Weaver]",[James Cameron]
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[Adventure, Fantasy, Action]","[ocean, drug abuse, exotic island, east india ...","[Johnny Depp, Orlando Bloom, Keira Knightley]",[Gore Verbinski]
2,206647,Spectre,A cryptic message from Bond’s past sends him o...,"[Action, Adventure, Crime]","[spy, based on novel, secret agent, sequel, mi...","[Daniel Craig, Christoph Waltz, Léa Seydoux]",[Sam Mendes]
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...,"[Action, Crime, Drama, Thriller]","[dc comics, crime fighter, terrorist, secret i...","[Christian Bale, Michael Caine, Gary Oldman]",[Christopher Nolan]
4,49529,John Carter,"John Carter is a war-weary, former military ca...","[Action, Adventure, Science Fiction]","[based on novel, mars, medallion, space travel...","[Taylor Kitsch, Lynn Collins, Samantha Morton]",[Andrew Stanton]


In [9]:
# Combine all selected fields into one text field
def combine_data(row):
    return ' '.join(row['genres']) + ' ' + \
           ' '.join(row['keywords']) + ' ' + \
           ' '.join(row['cast']) + ' ' + \
           ' '.join(row['crew']) + ' ' + \
           str(row['overview'])

# Create new column
movies_df['tags'] = movies_df.apply(combine_data, axis=1)

# Keep only required columns for the model
final_df = movies_df[['movie_id', 'title', 'tags']]

final_df.head()


Unnamed: 0,movie_id,title,tags
0,19995,Avatar,Action Adventure Fantasy Science Fiction cultu...
1,285,Pirates of the Caribbean: At World's End,Adventure Fantasy Action ocean drug abuse exot...
2,206647,Spectre,Action Adventure Crime spy based on novel secr...
3,49026,The Dark Knight Rises,Action Crime Drama Thriller dc comics crime fi...
4,49529,John Carter,Action Adventure Science Fiction based on nove...


In [10]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Convert to lowercase
final_df['tags'] = final_df['tags'].str.lower()

# Initialize vectorizer
vectorizer = CountVectorizer(max_features=5000, stop_words='english')

# Convert tags to vectors
vectors = vectorizer.fit_transform(final_df['tags']).toarray()

# Compute cosine similarity
similarity_scores = cosine_similarity(vectors)

similarity_scores.shape  # Should return something like (4806, 4806)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df['tags'] = final_df['tags'].str.lower()


(4809, 4809)

In [23]:
from IPython.display import Image, display

def recommend(movie_title):
    try:
        index = movies[movies['title'].str.lower() == movie_title.lower()].index[0]
    except IndexError:
        print("❌ Movie not found.")
        return

    distances = similarity[index]
    movie_indices = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:6]

    for i in movie_indices:
        movie_id = movies.iloc[i[0]].movie_id
        title = movies.iloc[i[0]].title
        poster_url = fetch_poster_by_title(title)


        print(f"🎬 {title}")
        if poster_url:
            display(Image(url=poster_url))
        else:
            print("🚫 Poster not available.")


In [12]:
movies = pickle.load(open("movie_list.pkl", "rb"))
similarity = pickle.load(open("similarity.pkl", "rb"))


In [13]:
recommend("Avatar")


❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 Aliens
🚫 Poster not available.
🎬 Moonraker


❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 Alien
🚫 Poster not available.
❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 Alien³
🚫 Poster not available.
❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 Silent Running
🚫 Poster not available.


In [14]:
import pickle

# Save movie list (with titles and ids)
pickle.dump(final_df, open('movie_list.pkl', 'wb'))

# Save similarity matrix
pickle.dump(similarity_scores, open('similarity.pkl', 'wb'))


In [15]:
# Load previously saved pickle files
movies = pickle.load(open('movie_list.pkl', 'rb'))
similarity = pickle.load(open('similarity.pkl', 'rb'))

# Show the first few movie titles
movies['title'].head()



0                                      Avatar
1    Pirates of the Caribbean: At World's End
2                                     Spectre
3                       The Dark Knight Rises
4                                 John Carter
Name: title, dtype: object

In [16]:
import pickle

# Load your saved movie data and similarity matrix
movies = pickle.load(open('movie_list.pkl', 'rb'))
similarity = pickle.load(open('similarity.pkl', 'rb'))


In [17]:
import ipywidgets as widgets
from IPython.display import display

movie_selector = widgets.Dropdown(
    options=movies['title'].values,
    description='Movie:',
    layout=widgets.Layout(width='50%')
)

display(movie_selector)


Dropdown(description='Movie:', layout=Layout(width='50%'), options=('Avatar', "Pirates of the Caribbean: At Wo…

In [22]:
def fetch_poster_by_title(title):
    try:
        url = f"https://api.themoviedb.org/3/search/movie?api_key={API_KEY}&query={title}"
        response = requests.get(url, timeout=5)
        data = response.json()
        results = data.get('results')

        if results:
            poster_path = results[0].get('poster_path')
            if poster_path:
                return f"https://image.tmdb.org/t/p/w500{poster_path}"
    except Exception as e:
        print(f"❌ Error: {e}")

    return "https://via.placeholder.com/200x300?text=No+Poster"


In [19]:
import requests

api_key = 'b76d821cbe9b6f7658da4cdc03826145'
test_url = f"https://api.themoviedb.org/3/movie/19995?api_key={api_key}&language=en-US"  # 19995 = Avatar (2009)

try:
    response = requests.get(test_url, timeout=5)
    data = response.json()
    print("✅ Success:", data.get('title'))
    print("Poster path:", data.get('poster_path'))
except Exception as e:
    print("❌ Failed:", e)


❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))


In [20]:
print(fetch_poster(550))


❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
None


In [21]:
recommend("Inception")
recommend("Titanic")
recommend("Iron Man")


❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 The Helix... Loaded
🚫 Poster not available.
🎬 The Count of Monte Cristo


❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 Flatliners
🚫 Poster not available.
❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 Cypher
🚫 Poster not available.
❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 Transformers: Revenge of the Fallen
🚫 Poster not available.
❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 The Notebook
🚫 Poster not available.
❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 Romance & Cigarettes
🚫 Poster not available.
🎬 Captain Phillips


❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 Veer-Zaara
🚫 Poster not available.
❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 Four Weddings and a Funeral
🚫 Poster not available.
❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 Iron Man 2
🚫 Poster not available.
❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 Iron Man 3
🚫 Poster not available.
❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 Avengers: Age of Ultron
🚫 Poster not available.
❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 Captain America: Civil War
🚫 Poster not available.
❌ Failed: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
🎬 The Avengers
🚫 Poster not available.
