In [1]:
import requests
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import time

API_KEY = "7b1e25c715e2b1fd047c1f76e5bf407f"
BASE_URL = "https://api.themoviedb.org/3"

def fetch_genres(api_key):
    try:
        response = requests.get(
            f"{BASE_URL}/genre/movie/list",
            params={"api_key": api_key, "language": "en-US"},
        )
        response.raise_for_status()
        data = response.json()
        return {genre["id"]: genre["name"] for genre in data.get("genres", [])}
    except requests.exceptions.RequestException as e:
        print(f"Error fetching genres: {e}")
        return {}

def fetch_movies(api_key, category="top_rated"):
    all_movies = []
    page = 1

    while True:
        try:
            response = requests.get(
                f"{BASE_URL}/movie/{category}",
                params={"api_key": api_key, "language": "en-US", "page": page},
            )
            response.raise_for_status()
            data = response.json()

            # If there are no more movies, break the loop
            if not data.get("results"):
                break

            # Append movie results
            all_movies.extend(data.get("results", []))
            print(f"Page {page} of {category} fetched successfully.")
            page += 1

            # Rate limiting (optional)
            time.sleep(0.5)

        except requests.exceptions.RequestException as e:
            print(f"Error fetching page {page}: {e}")
            break

    return all_movies

def fetch_movie_details(api_key, movie_id):
    try:
        response = requests.get(
            f"{BASE_URL}/movie/{movie_id}",
            params={"api_key": api_key, "language": "en-US"},
        )
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching details for movie ID {movie_id}: {e}")
        return None

def fetch_movie_credits(api_key, movie_id):
    try:
        response = requests.get(
            f"{BASE_URL}/movie/{movie_id}/credits",
            params={"api_key": api_key},
        )
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching credits for movie ID {movie_id}: {e}")
        return None

def enrich_movie_data(movie, genre_map):
    movie_id = movie.get("id")
    details = fetch_movie_details(API_KEY, movie_id)
    credits = fetch_movie_credits(API_KEY, movie_id)

    if details:
        genre_names = [genre_map.get(genre_id) for genre_id in movie.get("genre_ids", []) if genre_map.get(genre_id)]
        
        enriched_movie = {
            "id": movie.get("id"),
            "title": movie.get("title"),
            "release_date": movie.get("release_date"),
            "vote_average": movie.get("vote_average"),
            "overview": movie.get("overview"),
            "poster_path": f"https://image.tmdb.org/t/p/w500{movie.get('poster_path')}" if movie.get('poster_path') else None,
            "original_language": movie.get("original_language"),
            "genre_names": ", ".join(genre_names),  # List of genre names
            "popularity": movie.get("popularity"),
            "vote_count": movie.get("vote_count"),
            "backdrop_path": f"https://image.tmdb.org/t/p/w500{movie.get('backdrop_path')}" if movie.get('backdrop_path') else None,
            "original_title": details.get("original_title"),
            "adult": details.get("adult"),
            "video": details.get("video"),
            "production_companies": ", ".join([company["name"] for company in details.get("production_companies", [])]),
            "production_countries": ", ".join([country["name"] for country in details.get("production_countries", [])]),
            "runtime": details.get("runtime"),
            "spoken_languages": ", ".join([lang["name"] for lang in details.get("spoken_languages", [])]),
            "budget": details.get("budget"),
            "revenue": details.get("revenue"),
            "status": details.get("status"),
            "tagline": details.get("tagline"),
            "homepage": details.get("homepage"),
            "imdb_id": details.get("imdb_id"),
            # Limit to the first 3 cast members
            "cast": ", ".join([cast_member["name"] for cast_member in credits.get("cast", [])[:3]]) if credits else None,
            # Only take the first crew member (director)
            "crew": ", ".join([crew_member["name"] for crew_member in credits.get("crew", []) if crew_member["job"] == "Director"][:1]) if credits else None,
        }
        return enriched_movie
    return None

genre_map = fetch_genres(API_KEY)

movies = fetch_movies(API_KEY, category="top_rated")
print(f"Fetched {len(movies)} movies.")

with ThreadPoolExecutor(max_workers=10) as executor:
    enriched_movies = list(executor.map(lambda movie: enrich_movie_data(movie, genre_map), movies))

enriched_movies = [movie for movie in enriched_movies if movie is not None]

print(f"Enriched data for {len(enriched_movies)} movies.")

def save_movies_to_csv(movies, filename="tmdb_movies.csv"):
    if not movies:
        print("No movies to save.")
        return
    df = pd.DataFrame(movies)
    df.to_csv(filename, index=False, encoding="utf-8")
    print(f"Movies saved to '{filename}'.")

save_movies_to_csv(enriched_movies)

Page 1 of top_rated fetched successfully.
Page 2 of top_rated fetched successfully.
Page 3 of top_rated fetched successfully.
Page 4 of top_rated fetched successfully.
Page 5 of top_rated fetched successfully.
Page 6 of top_rated fetched successfully.
Page 7 of top_rated fetched successfully.
Page 8 of top_rated fetched successfully.
Page 9 of top_rated fetched successfully.
Page 10 of top_rated fetched successfully.
Page 11 of top_rated fetched successfully.
Page 12 of top_rated fetched successfully.
Page 13 of top_rated fetched successfully.
Page 14 of top_rated fetched successfully.
Page 15 of top_rated fetched successfully.
Page 16 of top_rated fetched successfully.
Page 17 of top_rated fetched successfully.
Page 18 of top_rated fetched successfully.
Page 19 of top_rated fetched successfully.
Page 20 of top_rated fetched successfully.
Page 21 of top_rated fetched successfully.
Page 22 of top_rated fetched successfully.
Page 23 of top_rated fetched successfully.
Page 24 of top_rated