# BetaSeries Recommender System

In [102]:
import os
import time

import pandas as pd
import requests
from dotenv import load_dotenv

In [7]:
# Configuration
load_dotenv()
API_KEY = os.getenv("API_KEY")
ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")

BASE_URL = "https://api.betaseries.com"
API_VERSION = 3.0
USER_AGENT = "movie_recommender_system/1.0"

# Define headers and parameters that are commonly used across API calls
COMMON_HEADERS = {
    "X-BetaSeries-Version": str(API_VERSION),
    "User-Agent": USER_AGENT,
}

In [127]:

class BetaSeriesAPI:
    def __init__(self, api_key, access_token=None):
        self.api_key = api_key
        self.access_token = access_token

    def _get_headers(self):
        headers = COMMON_HEADERS.copy()
        headers["X-BetaSeries-Key"] = self.api_key
        if self.access_token:
            headers["Authorization"] = f"Bearer {self.access_token}"
        return headers

    def get_shows_list(self, fields=None, limit=100, order=None):
        endpoint = "/shows/list"
        params = {}
        if fields:
            params["fields"] = ",".join(fields)
        if limit:
            params["limit"] = limit
        if order:
            params["order"] = order

        response = self._make_get_request(endpoint, params)
        return response.json()

    def get_movies_list(self, limit=100, order=None):
        endpoint = "/movies/list"
        params = {}
        if limit:
            params["limit"] = limit
        if order:
            params["order"] = order

        response = self._make_get_request(endpoint, params)
        return response.json()

    def get_movie_details(self, movie_id):
        endpoint = "/movies/movie"
        params = {"id": movie_id}

        response = self._make_get_request(endpoint, params)
        return response.json()
        
    def _make_get_request(self, endpoint, params=None):
        url = f"{BASE_URL}{endpoint}"
        headers = self._get_headers()

        response = requests.get(url, headers=headers, params=params)
        if response.status_code == 200:
            return response

        msg = f"Error {response.status_code}: {response.text}"
        raise ValueError(msg)


        return response

In [121]:
api_key = API_KEY 
access_token = ACCESS_TOKEN
client = BetaSeriesAPI(api_key, access_token)

# Fetch the movies data
movies_data = client.get_movies_list(limit=1000, order="popularity")

# Extract the required data from the movies
extracted_data = []
for movie in movies_data.get("movies", []):
    title = movie.get("title")
    movie_id = movie.get("id")
    movie_details = client.get_movie_details(movie_id).get("movie", [])
    total_notes = movie_details.get("notes", {}).get("total")
    mean_notes = movie_details.get("notes", {}).get("mean")
    
    extracted_data.append({
        "title": title,
        "total_notes": total_notes,
        "mean_notes": mean_notes,
    })
# Convert the extracted data to a DataFrame
df_movies = pd.DataFrame(extracted_data)

In [124]:
# Calculate all the components based on the above formula
v=df_movies["total_notes"]
R=df_movies["mean_notes"]
C=df_movies["mean_notes"].mean()
#m=movies_cleaned_df_movies['vote_count'].quantile(0.70)
m=df_movies.nlargest(250, "total_notes").iloc[-1]["total_notes"]

df_movies["weighted_average"]=((R*v)+ (C*m))/(v+m)

df_movies.sort_values("weighted_average",ascending=False).head(50)



Unnamed: 0,title,total_notes,mean_notes,weighted_average
407,Le Seigneur des anneaux : Le Retour du roi,7832,4.56,4.342297
436,La Ligne verte,5529,4.64,4.334791
322,Le Seigneur des anneaux : La Communauté de l'a...,8381,4.51,4.314355
355,Le Seigneur des anneaux : Les Deux Tours,6704,4.52,4.285487
372,Forrest Gump,6427,4.5,4.263413
412,Le Roi lion,6525,4.47,4.243945
161,Harry Potter à l'école des sorciers,10664,4.36,4.225688
359,Fight Club,5536,4.48,4.222585
324,The Dark Knight : Le Chevalier noir,5814,4.44,4.202838
340,Inception,8989,4.34,4.189958


In [125]:
if __name__ == "__main__":
    api_key = API_KEY 
    access_token = ACCESS_TOKEN

    client = BetaSeriesAPI(api_key, access_token)
    fields_to_fetch = ["title", "notes"]
    shows_data = client.get_shows_list(fields=fields_to_fetch, limit=5000, order="popularity")

    # Extract the required data from the shows
    extracted_data = []
    for show in shows_data.get("shows", []):
        title = show.get("title")
        total_notes = show.get("notes", {}).get("total")
        mean_notes = show.get("notes", {}).get("mean")
        
        extracted_data.append({
            "title": title,
            "total_notes": total_notes,
            "mean_notes": mean_notes,
        })

    # Convert the extracted data to a DataFrame
    df_popularity = pd.DataFrame(extracted_data)


In [126]:
df_popularity = df_popularity[df_popularity["title"] != ""]
df_popularity = df_popularity[df_popularity["total_notes"] != 0]
df_popularity.sort_values(by=["total_notes"], ascending=False)


Unnamed: 0,title,total_notes,mean_notes
137,Game of Thrones,19513,4.67053
150,Breaking Bad,12268,4.72380
378,How I Met Your Mother,8894,4.37643
232,The Big Bang Theory,8472,4.53550
163,The Walking Dead,8286,4.38414
...,...,...,...
315,The Walking Dead: The Ones Who Live,1,5.00000
4379,The Exodite,1,5.00000
3879,Lac-Mégantic: ceci n'est pas un accident,1,4.00000
4375,Taboo,1,4.00000


![image.png](http://trailerpark.weebly.com/uploads/8/8/5/5/8855465/7628808.png?371)

In [128]:
# Calculate all the components based on the above formula
v=df_popularity["total_notes"]
R=df_popularity["mean_notes"]
C=df_popularity["mean_notes"].mean()
#m=df_popularity['vote_count'].quantile(0.70)
m=df_popularity.nlargest(250, "total_notes").iloc[-1]["total_notes"]

df_popularity["weighted_average"]=((R*v)+ (C*m))/(v+m)

df_popularity.sort_values("weighted_average",ascending=False).head(20)

Unnamed: 0,title,total_notes,mean_notes,weighted_average
150,Breaking Bad,12268,4.7238,4.655216
137,Game of Thrones,19513,4.67053,4.629165
377,Sherlock,6746,4.7139,4.599943
369,Chernobyl,5941,4.69938,4.575408
1000,Kaamelott,4034,4.7169,4.544418
222,Friends,6731,4.628,4.527861
201,Le Jeu de la dame,7210,4.57073,4.485046
133,Peaky Blinders,3553,4.65635,4.483169
768,Doctor Who (2005),2629,4.71282,4.480237
232,The Big Bang Theory,8472,4.5355,4.465597


# The same but ordered by followers instead of popularity

In [None]:
if __name__ == "__main__":
    api_key = API_KEY 
    access_token = ACCESS_TOKEN

    client = BetaSeriesAPI(api_key, access_token)
    fields_to_fetch = ["title", "notes"]
    shows_data = client.get_shows_list(fields=fields_to_fetch, limit=5000, order="followers")

    # Extract the required data from the shows
    extracted_data = []
    for show in shows_data.get("shows", []):
        title = show.get("title")
        total_notes = show.get("notes", {}).get("total")
        mean_notes = show.get("notes", {}).get("mean")
        
        extracted_data.append({
            "title": title,
            "total_notes": total_notes,
            "mean_notes": mean_notes,
        })

    # Convert the extracted data to a DataFrame
    df_followers = pd.DataFrame(extracted_data)

In [72]:
df_followers = df_followers[df_followers["title"] != ""]
df_followers = df_followers[df_followers["total_notes"] != 0]
df_followers.sort_values(by=["total_notes"], ascending=False)

Unnamed: 0,title,total_notes,mean_notes
0,Game of Thrones,19513,4.67053
4,Breaking Bad,12268,4.72380
8,How I Met Your Mother,8894,4.37643
6,The Big Bang Theory,8472,4.53550
1,The Walking Dead,8286,4.38414
...,...,...,...
3429,Bheem Bam Boum,1,2.00000
3227,ARP Backstage Pass,1,3.00000
4489,Naruto Hichou,1,5.00000
3625,America's Funniest Home Videos,1,5.00000


In [80]:
# Calculate all the components based on the above formula
v=df_followers["total_notes"]
R=df_followers["mean_notes"]
C=df_followers["mean_notes"].mean()
#m=movies_cleaned_df_followers['vote_count'].quantile(0.70)
m=df_followers.nlargest(250, "total_notes").iloc[-1]["total_notes"]

df_followers["weighted_average"]=((R*v)+ (C*m))/(v+m)

In [83]:
df_followers.sort_values("weighted_average",ascending=False).head(20)

Unnamed: 0,title,total_notes,mean_notes,weighted_average
4,Breaking Bad,12268,4.7238,4.648773
0,Game of Thrones,19513,4.67053,4.624963
29,Sherlock,6746,4.7139,4.589085
65,Chernobyl,5941,4.69938,4.563346
96,Kaamelott,4034,4.7169,4.528054
32,Friends,6731,4.628,4.516983
38,Le Jeu de la dame,7210,4.57073,4.47478
43,Peaky Blinders,3553,4.65635,4.465188
120,Doctor Who (2005),2629,4.71282,4.458042
6,The Big Bang Theory,8472,4.5355,4.456654
