In [1]:
import os
import io
import requests
import numpy as np
import pandas as pd
import onnxruntime as ort
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.manifold import TSNE
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display

In [2]:
# ----------------------
# Load Vocabulary
# ----------------------
def load_movies(model_name):
    model_path = os.path.join("models", model_name, "movies.csv")
    movies = pd.read_csv(model_path)
    return movies
        
def load_ratings(model_name):
    model_path = f"https://drive.google.com/uc?id=1YAAL02PJ82kBEiMO87okSpHpUiCzIKe3"
    response = requests.get(model_path)
    response.raise_for_status()
    data = io.BytesIO(response.content)
    ratings = pd.read_parquet(data)
    return ratings

def load_embeddings(model_name):
    model_path = os.path.join("models", model_name, "embeddings.npy")
    ratings = np.load(model_path)
    return ratings
        
def load_genres():
    return ['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'IMAX', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

# ----------------------
# Load ONNX Model
# ----------------------
def load_model(model_name):
    model_path = os.path.join("models", model_name, "model-q.onnx")
    return ort.InferenceSession(model_path)

# ----------------------
# Predict Function
# ----------------------
def recommend_similar_movies_name(movie_name, movies, movie_encoder, embeddings, top_n):
    movie_name = movie_name.strip()
    matching_movies = movies[movies['title'].str.contains(movie_name, case=False, regex=False)]
    if matching_movies.empty:
        return None
    movie_id = matching_movies.iloc[0]['movieId']
    
    movie_encoded = movie_encoder.transform([movie_id])[0]
    target_embedding = embeddings[movie_encoded].reshape(1, -1)
    
    similarities = cosine_similarity(target_embedding, embeddings).flatten()
    similar_indices = similarities.argsort()[-(top_n + 1):-1][::-1]
    similar_movie_ids = movie_encoder.inverse_transform(similar_indices)
    
    recommendations = movies[movies['movieId'].isin(similar_movie_ids)][['movieId', 'title']]
    recommendations['similarity'] = similarities[similar_indices]
    recommendations["similarity"] = np.ceil(recommendations["similarity"] * 1000) / 100
    
    recommendations = recommendations.reset_index(drop=True)
    recommendations.index += 1
    
    recommendations.rename(columns={
    "movieId": "Movie ID",
    "title": "Title",
    "similarity": "Cosine Similarity"
    }, inplace=True)
    recommendations["Year"] = recommendations["Title"].str.extract(r"\((\d{4})\)")
    recommendations["Title"] = recommendations["Title"].str.replace(r" \(\d{4}\)", "", regex=True)
    recommendations = recommendations[["Movie ID", "Title", "Year", "Cosine Similarity"]]
    
    return recommendations


def recommend_by_genre_pop(genres, movies, ratings, top_n):
    filtered_movies = movies
    
    for genre in genres:
        filtered_movies = filtered_movies[filtered_movies[genre] == 1]
    if filtered_movies.empty:
        return None
    
    popularity = ratings.groupby('movieId').size().reset_index(name='rating_count')
    recommendations = filtered_movies.merge(popularity, on='movieId', how='left').fillna({'rating_count': 0})
    recommendations = recommendations.sort_values(by='rating_count', ascending=False)
    recommendations = recommendations[['movieId', 'title', 'rating_count']].head(top_n)
    recommendations["rating_count"] = recommendations["rating_count"].astype(int)
    recommendations = recommendations.reset_index(drop=True)
    recommendations.index += 1
    
    recommendations.rename(columns={
    "movieId": "Movie ID",
    "title": "Title",
    "rating_count": "Total Rating"
    }, inplace=True)
    recommendations["Year"] = recommendations["Title"].str.extract(r"\((\d{4})\)")
    recommendations["Title"] = recommendations["Title"].str.replace(r" \(\d{4}\)", "", regex=True)
    recommendations = recommendations[["Movie ID", "Title", "Year", "Total Rating"]]
    
    return recommendations


def recommend_combined_mix(movie_name, genres, movies, movie_encoder, embeddings, ratings, top_n=10):
    similar_movies = recommend_similar_movies_name(movie_name, movies, movie_encoder, embeddings, top_n=top_n*10)
    if similar_movies.empty:
        return None
    
    similar_movie_ids = similar_movies["Movie ID"].astype(int).values
    
    filtered_movies = movies[movies['movieId'].isin(similar_movie_ids)].copy()
    for genre in genres:
        filtered_movies = filtered_movies[filtered_movies[genre] == 1]
    if filtered_movies.empty:
        return None
    
    popularity = ratings.groupby('movieId').size().reset_index(name='rating_count')
    filtered_movies = filtered_movies.merge(popularity, on='movieId', how='left').fillna({'rating_count': 0})
    
    filtered_movies = filtered_movies.merge(similar_movies[['Movie ID', 'Cosine Similarity']],
                                            left_on='movieId', right_on='Movie ID', how='left')
    
    filtered_movies = filtered_movies.sort_values(by=['Cosine Similarity', 'rating_count'], ascending=[False, False])
    
    recommendations = filtered_movies[['movieId', 'title', 'Cosine Similarity', 'rating_count']].head(top_n)
    
    recommendations.rename(columns={
        'movieId': 'Movie ID',
        'title': 'Title',
        'rating_count': 'Total Rating'
    }, inplace=True)
    recommendations["Year"] = recommendations["Title"].str.extract(r"\((\d{4})\)")
    recommendations["Title"] = recommendations["Title"].str.replace(r" \(\d{4}\)", "", regex=True)
    recommendations = recommendations[["Movie ID", "Title", "Year", "Cosine Similarity", "Total Rating"]]
    recommendations = recommendations.reset_index(drop=True)
    recommendations.index += 1
    return recommendations

In [12]:
# ----------------------
# User Interface
# ----------------------
def create_mov_recsys_interface(model_name="ncf"):
    movies = load_movies(model_name)
    ratings = load_ratings(model_name)
    embeddings = load_embeddings(model_name)
    all_genres = load_genres()
    
    user_encoder = LabelEncoder()
    user_encoder.fit_transform(ratings['userId'])
    movie_encoder = LabelEncoder()
    movie_encoder.fit_transform(ratings['movieId'])

    title = widgets.Label(value="Movie Recommender System")
    text_input = widgets.Textarea(description="Movie:", placeholder="e.g. Toy Story.")
    multi_select = widgets.SelectMultiple(
        options=all_genres,
        description='Genres:',
        disabled=False
    )
    int_slider = widgets.IntSlider(
        value=10,
        min=0,
        max=50,
        step=1,
        description='Number of Recommendations:',
        orientation='horizontal',
        readout=True,
        readout_format='d'
    )
    output_area = widgets.Textarea(value="Result:", layout=widgets.Layout(height='250px', width='1000px'), disabled=True)
    tag_button = widgets.Button(description="Recommend")
    
    def on_infer_clicked(b):
        movie_name = text_input.value
        selected_genres = multi_select.value
        top_n = int_slider.value
        if movie_name or selected_genres:
            if movie_name and not selected_genres:
                recommendations = recommend_similar_movies_name(movie_name, movies, movie_encoder, embeddings, top_n)
            elif not movie_name and selected_genres:
                recommendations = recommend_by_genre_pop(selected_genres, movies, ratings, top_n)
            elif movie_name and selected_genres:
                recommendations = recommend_combined_mix(movie_name, selected_genres, movies, movie_encoder, embeddings, ratings, top_n)
            output_area.value = f"Result:\n{recommendations.to_string(index=False)}"
        else:
            output_area.value = "Please enter either a preferred movie in the input box or select some genres."
    
    tag_button.on_click(on_infer_clicked)
    
    display(widgets.VBox([title, text_input, multi_select, int_slider, tag_button, output_area]))

In [13]:
create_mov_recsys_interface("ncf")

VBox(children=(Label(value='Movie Recommender System'), Textarea(value='', description='Movie:', placeholder='â€¦