In [2]:
import pandas as pd
from fuzzywuzzy import fuzz
from surprise import SVD, Reader, Dataset
from pathlib import Path
from flask import Flask, request
import json
from pymongo import MongoClient

def db_connection():
    uri = "mongodb+srv://movie:Trucquynh@cluster0.h1patwk.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
    client = MongoClient(uri)
    return client

client = db_connection()
db = client["test"] 
collection = db["films"] 
collectionRating = db["rating"]
documents = collection.find()
film_data = pd.DataFrame(documents)
rating = pd.DataFrame(collectionRating.find())
film_data = film_data[["id", "title", "overview", "vote_average", "genre_names"]]

df_exploded = rating.explode("Rating")

ratingUser = pd.concat(
    [df_exploded.drop(columns=["Rating"]), df_exploded["Rating"].apply(pd.Series)],
    axis=1,
)
ratingUser=ratingUser.drop(columns=["_id"])
ratingUser.rename(columns={"FilmId":"id"}, inplace = True)
collectedData =  pd.merge(film_data, ratingUser[['UserId', 'Score', 'id']], on='id', how='right')

# Create a mapping for FilmId to Title and Genre
film_id_to_title = dict(zip(film_data['id'], film_data['title']))
film_id_to_genre = dict(zip(film_data['id'], film_data['genre_names']))


# Train the SVD model
def train_svd_model():
    """Train the SVD model on user ratings."""
    reader = Reader(rating_scale=(1, 10))
    dataset = Dataset.load_from_df(collectedData[['UserId', 'id', 'Score']], reader)
    trainset = dataset.build_full_trainset()
    algo = SVD()
    algo.fit(trainset)
    print("Model training completed!")
    return algo

# Get popular unwatched films
def get_popular_unwatched_films(user_id, top_n=50):
    """Retrieve popular films not yet rated by the user."""
    watched_films = collectedData[collectedData['UserId'] == user_id]['id'].unique()
    popular_films = film_data[~film_data['id'].isin(watched_films)]['id'].tolist()
    return popular_films[:top_n]


app = Flask(__name__)

# Recommendation system functions
@app.route("/predictSVD", methods = ["GET"])
def recommend_films():
    """Recommend films for a user based on SVD predictions."""
    conn = db_connection()
    cursor = conn.cursor()

    user_id = request.json.get("UserId")
    film_ids =  get_popular_unwatched_films(user_id)[:50]
    algo = train_svd_model()
    predictions = []
    for film_id in film_ids:
        pred = algo.predict(user_id, film_id)
        predictions.append((film_id, pred.est))
    predictions.sort(key=lambda x: x[1], reverse=True)
    return predictions[:10]

@app.route("/similarFilms", methods = ["GET"])
def get_similar_films():
    conn = db_connection()
    cursor = conn.cursor()

    film_id = request.json.get("id")

    """Find similar films based on title and genre similarity."""
    selected_film_title = film_id_to_title.get(film_id, "Unknown Title")
    selected_film_genre = film_id_to_genre.get(film_id, "Unknown Genre")

    similar_films = []
    for other_id, other_title in film_id_to_title.items():
        if other_id == film_id:
            continue
        # Calculate similarity
        title_similarity = fuzz.ratio(selected_film_title, other_title)
        genre_similarity = fuzz.token_sort_ratio(selected_film_genre, film_id_to_genre.get(other_id, ""))
        total_similarity = title_similarity + genre_similarity
        similar_films.append((other_id, other_title, total_similarity))
    
    # Sort and return the top 12 most similar films
    similar_films.sort(key=lambda x: x[2], reverse=True)
    return similar_films[:12]


# Example usage
if __name__ == "__main__":
    app.run(debug=True, port=5002)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


 * Restarting with watchdog (windowsapi)


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
