<a href="https://colab.research.google.com/github/udaydaroch/Algorithm-Visualizer/blob/main/MovieRecommendationModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install dask pandas scikit-learn nltk faker surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357243 sha256=44af6a62d71e996ccecc0a89077b508658886ff67280073e8a7acf3346c200e1
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully inst

In [None]:
import json
import random
from faker import Faker
import dask.dataframe as dd
import pandas as pd
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import train_test_split
from tabulate import tabulate
import random


fake = Faker()
genres = ["Action", "Comedy", "Drama", "Fantasy", "Horror", "Romance", "Sci-Fi", "Thriller"]

# Generate random movie metadata
def generate_movies(num_movies):
    movies = []
    for i in range(num_movies):
        movie = {
            "movie_id": i,
            "title": fake.sentence(nb_words=3).replace(".", ""),
            "genres": random.choice(genres),
            "director": fake.name(),
            "actors": [fake.name() for _ in range(3)],
            "release_year": random.randint(1950, 2023),
            "plot_summary": fake.text(max_nb_chars=200),
            "duration": random.randint(60, 180),
            "language": fake.language_name(),
            "country": fake.country(),
            "average_rating": round(random.uniform(1, 10), 1)
        }
        movies.append(movie)
    return movies

# Generate random user interaction data
def generate_ratings(num_users, num_movies):
    ratings = []
    for user_id in range(num_users):
        for movie_id in random.sample(range(num_movies), random.randint(5, 20)):
            rating = {
                "user_id": user_id,
                "movie_id": movie_id,
                "rating": random.randint(1, 5),
                "timestamp": fake.date_time_this_decade().timestamp()
            }
            ratings.append(rating)
    return ratings

# Save to JSONL files
def save_to_jsonl(data, filename):
    with open(filename, 'w') as f:
        for item in data:
            f.write(json.dumps(item) + "\n")

num_movies = 100000
num_users = 100

movies = generate_movies(num_movies)
ratings = generate_ratings(num_users, num_movies)

save_to_jsonl(movies, "movies.jsonl")
save_to_jsonl(ratings, "ratings.jsonl")

print("Data generated and saved to JSONL files.")



Data generated and saved to JSONL files.


In [None]:
# Load data using Dask
movies_dd = dd.read_json('movies.jsonl', lines=True)
ratings_dd = dd.read_json('ratings.jsonl', lines=True)

# Join data on movie_id
merged_dd = dd.merge(ratings_dd, movies_dd, on='movie_id')

print("Data loaded and joined.")

# Convert to pandas DataFrame for interactive operations
merged_df = merged_dd.compute()

print(merged_df.head())

Data loaded and joined.
   user_id  movie_id  rating                     timestamp  \
0        0     49046       5 2020-01-26 15:09:07.611001088   
1        0     10787       3 2021-07-09 21:02:14.913149952   
2        0     61812       1 2021-07-11 21:48:22.020065024   
3        0      3390       4 2021-01-19 07:40:16.825721856   
4        0     80944       1 2022-03-02 18:05:48.317865984   

                      title    genres         director  \
0  Industry yourself parent    Sci-Fi    Dale Anderson   
1               Author card    Sci-Fi    Dale Espinoza   
2    Sister including thing     Drama    Tamara Walton   
3         Four economy need  Thriller  Danielle Nelson   
4              Position new   Romance  Courtney Morgan   

                                              actors  release_year  \
0  ['Gregory Payne', 'James Schwartz', 'Patrick M...          1958   
1  ['Rebecca Ford', 'Vanessa Lozano', 'Katelyn Ja...          1972   
2  ['Mr. Dennis Davis', 'Arthur Curtis', 'An

In [None]:
# Prepare data for Surprise library
reader = Reader(rating_scale=(1, 5))
ratings_pd = merged_dd[['user_id', 'movie_id', 'rating']].compute()
data = Dataset.load_from_df(ratings_pd, reader)

# Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=0.25)

# Train the SVD algorithm on the trainset
algo = SVD()
algo.fit(trainset)

# Predict ratings for the testset
predictions = algo.test(testset)

# Compute and print RMSE
accuracy.rmse(predictions)

RMSE: 1.5125


1.5125206545098993

In [None]:

def display_genres():

    table_data = [[i+1, genre] for i, genre in enumerate(genres)]

    print(tabulate(table_data, headers=["#", "Genre"], tablefmt="grid"))


def display_user_profile():
    table_data = []
    headers = ["Preferred Genre", "Liked Movies"]

    if user_profile["preferred_genre"]:
        preferred_genre = user_profile["preferred_genre"]
    else:
        preferred_genre = "None"

    liked_movies = []
    if user_profile["liked_movies"]:
        for genre, movies in user_profile["liked_movies"].items():
            liked_movies.append(f"{genre}: {', '.join(movies)}")
    else:
        liked_movies.append("None")

    table_data.append([preferred_genre, "\n".join(liked_movies)])

    print(tabulate(table_data, headers=headers, tablefmt="grid"))


In [None]:

def rate_movie(chosen_movie):
    rating = None
    while rating not in range(1, 6):
        try:
            rating = int(input(f"Rate the movie '{chosen_movie['title']}' (1-5): "))
        except ValueError:
            print("Invalid input. Please enter a number between 1 and 5.")

    preferred_genre = user_profile["preferred_genre"]
    if rating >= 3:
        if preferred_genre not in user_profile["liked_movies"]:
            user_profile["liked_movies"][preferred_genre] = []
        user_profile["liked_movies"][preferred_genre].append(chosen_movie['title'])
    else:
        user_profile["disliked_movies"].append(chosen_movie['movie_id'])

    print("Your preferences have been updated.")
    display_user_profile()

def choose_movie_to_watch(movies):
    table_data = []
    for i, movie in enumerate(movies):
        table_data.append([i+1, movie['title'], movie['release_year'], movie['average_rating'], movie['plot_summary'], movie['language'], movie['country']])

    print("Here are some movies you might like:")
    print(tabulate(table_data, headers=["#", "Title", "Release Year", "Average Rating", "Plot Summary", "Language", "Country"], tablefmt="grid"))

    movie_choice = None
    while movie_choice not in range(1, len(movies) + 1):
        try:
            movie_choice = int(input(f"Choose a movie to watch (1-{len(movies)}): "))
        except ValueError:
            print(f"Invalid input. Please enter a number between 1 and {len(movies)}.")

    chosen_movie = movies[movie_choice - 1]
    print(f"You chose to watch: {chosen_movie['title']}")
    return chosen_movie

Available genres:
+-----+----------+
|   # | Genre    |
|   1 | Action   |
+-----+----------+
|   2 | Comedy   |
+-----+----------+
|   3 | Drama    |
+-----+----------+
|   4 | Fantasy  |
+-----+----------+
|   5 | Horror   |
+-----+----------+
|   6 | Romance  |
+-----+----------+
|   7 | Sci-Fi   |
+-----+----------+
|   8 | Thriller |
+-----+----------+
Enter your preferred genre: Action
Here are some movies you might like:
+-----+------------------------+----------------+------------------+--------------------------------------------------------------------------------------------------------------------+-------------+-----------------------+
|   # | Title                  |   Release Year |   Average Rating | Plot Summary                                                                                                       | Language    | Country               |
|   1 | Appear red             |           1958 |              1.3 | Economic key behind society.                       

KeyboardInterrupt: Interrupted by user

In [None]:
# User profile
user_profile = {
    "preferred_genre": None,
    "liked_movies": {},
    "disliked_movies": []
}

def get_recommendations(user_id, preferred_genre=None, num_recommendations=5):
    user_ratings = []
    if preferred_genre:
        movie_ids = movies_dd[movies_dd['genres'] == preferred_genre]['movie_id'].compute().tolist()
    else:
        movie_ids = movies_dd['movie_id'].compute().tolist()

    # Exclude disliked movies and liked movies
    excluded_movies = set(user_profile["disliked_movies"])
    for liked_movies in user_profile["liked_movies"].values():
        excluded_movies.update(movies_dd[movies_dd['title'].isin(liked_movies)]['movie_id'].compute().tolist())
    movie_ids = [iid for iid in movie_ids if iid not in excluded_movies]

    for iid in movie_ids:
        pred = algo.predict(user_id, iid)
        user_ratings.append((iid, pred.est))

    # Sort by predicted rating in descending order
    user_ratings.sort(key=lambda x: x[1], reverse=True)

    # Get top N recommendations
    recommended_ids = [iid for iid, _ in user_ratings[:num_recommendations]]
    recommended_movies = movies_dd[movies_dd['movie_id'].isin(recommended_ids)].compute()

    # Convert to list of dictionaries
    recommendations = []
    for _, movie in recommended_movies.iterrows():
        recommendations.append({
            'movie_id': movie['movie_id'],
            'title': movie['title'],
            'release_year': movie['release_year'],
            'average_rating': movie['average_rating'],
            'director': movie['director'],
            'actors': movie['actors'],
            'plot_summary': movie['plot_summary'],
            'language': movie['language'],
            'country': movie['country']
        })
    return recommendations

In [None]:

def recommend_movies():
    global user_profile
    current_recommendations = None

    if user_profile["preferred_genre"] is None:
        print("Available genres:")
        display_genres()
        user_profile["preferred_genre"] = input("Enter your preferred genre: ")

    preferred_genre = user_profile["preferred_genre"]
    genre_movies = movies_dd[movies_dd['genres'] == preferred_genre].compute()
    if genre_movies.empty:
        print("No movies found in this genre.")
        return

    while True:
        if current_recommendations is None:
            if preferred_genre in user_profile["liked_movies"] and len(user_profile["liked_movies"][preferred_genre]) > 1:
                print(f"Based on your interest in {preferred_genre} movies like {user_profile['liked_movies'][preferred_genre]}, we recommend:")
                current_recommendations = get_recommendations(user_id, preferred_genre)
                if not current_recommendations:
                    print("No recommendations found.")
                    break
            else:
                current_recommendations = genre_movies.sample(3).to_dict(orient='records')

        chosen_movie = choose_movie_to_watch(current_recommendations)
        rate_movie(chosen_movie)

        # Remove the chosen movie from the current recommendations
        current_recommendations = [movie for movie in current_recommendations if movie['movie_id'] != chosen_movie['movie_id']]

        if not current_recommendations or input("Do you want to choose another movie from the current recommendations? (yes/no): ").lower() != "yes":
            break

    if input("Do you want more recommendations? (yes/no): ").lower() == "yes":
        if input("Do you want to change genre? (yes/no): ").lower() == "yes":
            print("Available genres:")
            display_genres()
            user_profile["preferred_genre"] = input("Enter your new preferred genre: ")
        recommend_movies()

recommend_movies()
