<a href="https://colab.research.google.com/github/abiralchy0987/movie_recommendation_system/blob/main/Streamlit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install streamlit pyngrok
!pip install scikit-surprise
!pip install thefuzz[speedup]


Collecting streamlit
  Downloading streamlit-1.42.2-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.42.2-py2.py3-none-any.whl (9.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m37.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64

In [None]:
%%writefile app.py
import streamlit as st
st.set_page_config(page_title="Movie Recommender", layout="wide")

import streamlit as st
import pandas as pd
import numpy as np
import ast
import nltk
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from thefuzz import fuzz, process

# Download NLTK resources
nltk.download('stopwords')

# Set page config

# Cache data loading and preprocessing
@st.cache_data
def load_and_preprocess_data():
    movies = pd.read_csv('tmdb_5000_movies.csv')
    credits = pd.read_csv('tmdb_5000_credits.csv')
    movies = movies.merge(credits, on='title')
    movies = movies[['movie_id','title','overview','genres','keywords','cast','crew']]
    movies.dropna(inplace=True)

    def convert(text):
        return [i['name'] for i in ast.literal_eval(text)]

    def convert_cast(text):
        return [i['name'] for i in ast.literal_eval(text)[:3]]

    def fetch_director(text):
        return [i['name'] for i in ast.literal_eval(text) if i['job'] == 'Director'][:1]

    movies['genres'] = movies['genres'].apply(convert)
    movies['keywords'] = movies['keywords'].apply(convert)
    movies['cast'] = movies['cast'].apply(convert_cast)
    movies['crew'] = movies['crew'].apply(fetch_director)
    movies['overview'] = movies['overview'].apply(lambda x: x.split())

    movies['tags'] = movies['overview'] + movies['keywords'] + movies['genres'] + movies['cast'] + movies['crew']
    movies['tags'] = movies['tags'].apply(lambda x: ' '.join(x).lower())

    ps = PorterStemmer()
    movies['tags'] = movies['tags'].apply(lambda x: ' '.join([ps.stem(word) for word in x.split()]))

    cv = CountVectorizer(max_features=5000, stop_words='english')
    vectors = cv.fit_transform(movies['tags']).toarray()
    similarity = cosine_similarity(vectors)

    return movies, similarity

@st.cache_resource
def train_collaborative_model():
    ratings = pd.read_csv('ratings.csv')
    movies_df = pd.read_csv('movies.csv')
    movies_merged = pd.merge(ratings, movies_df, on='movieId')

    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(movies_merged[['userId', 'movieId', 'rating']], reader)
    trainset = data.build_full_trainset()
    svd = SVD()
    svd.fit(trainset)

    return svd, movies_merged

movies_df, similarity = load_and_preprocess_data()
svd_model, merged_data = train_collaborative_model()

def find_closest_movie(title_input):
    all_titles = movies_df['title'].tolist()
    best_match, score = process.extractOne(title_input.lower(), [t.lower() for t in all_titles], scorer=fuzz.partial_ratio)
    return all_titles[[t.lower() for t in all_titles].index(best_match)] if score > 60 else None

def hybrid_recommend(user_id, movie_title, top_n=10):
    matched_movie = find_closest_movie(movie_title)
    if not matched_movie:
        return []

    movie_index = movies_df[movies_df['title'] == matched_movie].index[0]
    content_recs = [movies_df.iloc[i[0]]['title'] for i in
                   sorted(enumerate(similarity[movie_index]), key=lambda x: x[1], reverse=True)[1:6]]

    user_rated = set(merged_data[merged_data['userId'] == user_id]['movieId'])
    all_movies = merged_data['movieId'].unique()
    unrated = [mid for mid in all_movies if mid not in user_rated]
    collab_recs = [merged_data[merged_data['movieId'] == mid]['title'].values[0]
                  for mid in unrated[:10]]

    return list(set(content_recs + collab_recs))[:top_n]

st.title("🎬 Hybrid Movie Recommender System")

col1, col2 = st.columns(2)
with col1:
    user_id = st.number_input("Enter User ID", min_value=1, value=1, step=1)

with col2:
    movie_title = st.text_input("Enter a Movie Title", "The Dark Knight")

if st.button("Get Recommendations"):
    if user_id and movie_title:
        with st.spinner('Finding best recommendations...'):
            recommendations = hybrid_recommend(user_id, movie_title)

        if recommendations:
            st.subheader("Recommended Movies:")
            for i, title in enumerate(recommendations, 1):
                st.markdown(f"{i}. {title}")
        else:
            st.warning("No recommendations found. Please try a different movie title.")
    else:
        st.error("Please fill in both fields")

st.markdown("---")
st.write("### Dataset Information")
st.write("Content-based data shape:", movies_df.shape)
st.write("Collaborative data shape:", merged_data.shape)


Writing app.py


In [None]:
!streamlit run app.py &> log.txt &


In [None]:
import os
import signal

# Find and kill existing ngrok processes
for line in os.popen("ps ax | grep ngrok | grep -v grep"):
    fields = line.split()
    pid = fields[0]
    os.kill(int(pid), signal.SIGKILL)

from pyngrok import ngrok

# Set your authtoken
ngrok.set_auth_token("2tJWKmagjWWC5BYwbAxgWdXy8Fq_rvKfmHk7puUZAZHyY6WD") # Replace YOUR_AUTHTOKEN with your actual authtoken

# Start a new tunnel to port 8501 (default Streamlit port)
# Specify the port within a configuration dictionary for HTTP/2 tunnels
tunnel = ngrok.connect(8501, proto="http", bind_tls=True)
# or
# tunnel = ngrok.connect(addr="http://localhost:8501", bind_tls=True)

public_url = tunnel.public_url

print(f"Streamlit App is live at: {public_url}")

Streamlit App is live at: https://b1c5-34-86-210-137.ngrok-free.app


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
