In [1]:
import numpy as np
import pandas as pd
import ast
import copy

In [2]:
from flask import Flask, request, jsonify
from threading import Thread
from flask_cors import CORS

app = Flask(__name__)
CORS(app)

<flask_cors.extension.CORS at 0x211d85c6960>

In [3]:
movies = pd.read_csv('./assets/tmdb_5000_movies.csv')
movies.head(1)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800


In [4]:
credits = pd.read_csv('./assets/tmdb_5000_credits.csv')
credits.head(1)

Unnamed: 0,movie_id,title,cast,crew
0,19995,Avatar,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."


In [5]:
# Merging both data frames

movies = movies.merge(credits, on='title')
movies.head(1)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,movie_id,cast,crew
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,19995,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."


In [6]:
# What to keep in the dataframe
# genres
# id
# keywords
# title
# overview
# cast
# crew

movies = movies[['id', 'genres', 'title', 'overview', 'keywords', 'cast', 'crew' ]]

In [7]:
movies.isnull().sum()

id          0
genres      0
title       0
overview    3
keywords    0
cast        0
crew        0
dtype: int64

In [8]:
movies.dropna(inplace=True)
movies.isnull().sum()

id          0
genres      0
title       0
overview    0
keywords    0
cast        0
crew        0
dtype: int64

In [9]:
movies.duplicated().sum()

np.int64(0)

In [10]:
movies.iloc[0].genres

'[{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 14, "name": "Fantasy"}, {"id": 878, "name": "Science Fiction"}]'

In [11]:
def converter (obj):
    lst = []
    for i in ast.literal_eval(obj):
        lst.append(i['name'])
    return lst

movies.genres = movies['genres'].apply(converter)


In [12]:
movies.keywords = movies['keywords'].apply(converter)


In [13]:
def filter_movies(mvs, query):
    lst = []
    for mv in movies:   
        abc = mvs[query in mv[title]]
        lst.append(abc)
    return lst


In [14]:
@app.route('/api/movies', methods=['GET'])
def get_movies():
    movies_data = copy.deepcopy(movies[[ 'id', 'title']])
    title = request.args.get('title').lower() # Get 'title' query param
    # Convert to a list of dictionaries
    movies_json = movies_data.to_dict(orient="records")
    filtered_movies = [movie for movie in movies_json if title in movie['title'].lower()]
    return jsonify({"movies": filtered_movies })


In [15]:
def castConverter (obj):
    lst = []
    counter = 0
    for i in ast.literal_eval(obj):
        if (counter < 3):
            lst.append(i['name'])
            counter += 1
        else:
            break
    return lst

movies.cast = movies['cast'].apply(castConverter)

In [16]:
def findDirector (obj):
    lst = []
    for i in ast.literal_eval(obj):
        if (i['job'] == 'Director'):
            lst.append(i['name'])
            break
    return lst

movies.crew = movies['crew'].apply(findDirector)
movies.head(1)

Unnamed: 0,id,genres,title,overview,keywords,cast,crew
0,19995,"[Action, Adventure, Fantasy, Science Fiction]",Avatar,"In the 22nd century, a paraplegic Marine is di...","[culture clash, future, space war, space colon...","[Sam Worthington, Zoe Saldana, Sigourney Weaver]",[James Cameron]


In [17]:
movies.overview = movies['overview'].apply(lambda x: x.split())
movies.head(1)

Unnamed: 0,id,genres,title,overview,keywords,cast,crew
0,19995,"[Action, Adventure, Fantasy, Science Fiction]",Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[culture clash, future, space war, space colon...","[Sam Worthington, Zoe Saldana, Sigourney Weaver]",[James Cameron]


In [18]:
movies.genres = movies['genres'].apply(lambda x:[ i.replace(' ', '') for i in x])
movies.overview = movies['overview'].apply(lambda x:[ i.replace(' ', '') for i in x])
movies.keywords = movies['keywords'].apply(lambda x:[ i.replace(' ', '') for i in x])
movies.cast = movies['cast'].apply(lambda x:[ i.replace(' ', '') for i in x])
movies.crew = movies['crew'].apply(lambda x:[ i.replace(' ', '') for i in x])
movies.head(1)

Unnamed: 0,id,genres,title,overview,keywords,cast,crew
0,19995,"[Action, Adventure, Fantasy, ScienceFiction]",Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[cultureclash, future, spacewar, spacecolony, ...","[SamWorthington, ZoeSaldana, SigourneyWeaver]",[JamesCameron]


In [19]:
movies['tags'] = movies['overview'] + movies['keywords'] + movies['genres'] + movies['cast'] + movies['crew']

# taking out only useful data
movies = movies[['id', 'title', 'tags']]
movies.head(1)

Unnamed: 0,id,title,tags
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin..."


In [20]:
movies.tags = movies['tags'].apply(lambda x: " ".join(x))
movies.tags[0]

'In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization. cultureclash future spacewar spacecolony society spacetravel futuristic romance space alien tribe alienplanet cgi marine soldier battle loveaffair antiwar powerrelations mindandsoul 3d Action Adventure Fantasy ScienceFiction SamWorthington ZoeSaldana SigourneyWeaver JamesCameron'

In [21]:
movies.tags = movies['tags'].apply(lambda x: x.lower())
movies.tags[0]

'in the 22nd century, a paraplegic marine is dispatched to the moon pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization. cultureclash future spacewar spacecolony society spacetravel futuristic romance space alien tribe alienplanet cgi marine soldier battle loveaffair antiwar powerrelations mindandsoul 3d action adventure fantasy sciencefiction samworthington zoesaldana sigourneyweaver jamescameron'

### Bag of Words Technique

In [22]:
from sklearn.feature_extraction.text import CountVectorizer

In [23]:
cv = CountVectorizer(stop_words='english', max_features=5000)

In [24]:
vectors = cv.fit_transform(movies['tags']).toarray()

In [25]:
import nltk

In [26]:
from nltk.stem.porter import PorterStemmer

In [27]:
stemmer = PorterStemmer()

In [28]:
def stemmerFunction(para):
    y = []
    for i in para.split():
        y.append(stemmer.stem(i))
    return " ".join(y)


In [29]:
movies.tags = movies['tags'].apply(stemmerFunction)


In [30]:
vectors = cv.fit_transform(movies['tags']).toarray()

In [31]:
from sklearn.metrics.pairwise import cosine_similarity

In [32]:
similarity = cosine_similarity(vectors)

In [33]:
def recommend(movie):
    movie_index = movies[movies['title'] == movie].index[0]
    distances = similarity[movie_index]
    movies_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x:x[1])[1:6]
    # return movies_list
    movies_data = []
    for i in movies_list:
        movies_data.append({ 'id': movies.iloc[i[0]].id.item(), 'title': movies.iloc[i[0]].title })
    return movies_data

In [34]:
@app.route('/api/recommend', methods=['GET'])
def recommender():
    movie = request.args.get('title')
    if movie is None:
        return jsonify({"error": "Missing 'title' parameter"}), 400
    data = recommend(movie)
    return jsonify(data)

In [None]:
def run_flask():
    app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)

flask_thread = Thread(target=run_flask)
flask_thread.start()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://10.30.20.64:5000
Press CTRL+C to quit
127.0.0.1 - - [06/Mar/2025 15:55:51] "GET /api/movies?title=hel HTTP/1.1" 200 -
127.0.0.1 - - [06/Mar/2025 15:56:24] "GET /api/movies?title=hel HTTP/1.1" 200 -
127.0.0.1 - - [06/Mar/2025 15:56:40] "GET /api/recommend?title=Hellboy HTTP/1.1" 200 -
