In [41]:
# Write a Python script to download IMDB 250 Top Rated Movies.
# For each movie, you'll need to retrieve the movie title, the ranking, the initial release year, and
# the rating.
# Your data must be stored in a proper imdb_top_250.csv file.

import requests, csv
from bs4 import BeautifulSoup
import re

def get_movies(url):
    # Set headers to mimic a browser request
    headers = {"User-Agent": "Mozilla/5.0"}

    # Send an HTTP GET request to IMDb
    response = requests.get(url, headers=headers)

    # Check if the request was successful
    if response.status_code == 200:
        print("Request successful")
        # Parse the HTML content
        soup = BeautifulSoup(response.text, "html.parser")

        # Find all movie containers
        movies = soup.select("li.ipc-metadata-list-summary-item")
        # print(movies[0])

        # get movie title
        # titles = movies[0].select_one("h3.ipc-title__text").text.strip()
        # title_cleaned = re.sub(r"^\d+\.\s*", "", titles)
        # print(title_cleaned)
        titles = [movie.select_one("h3.ipc-title__text").text.strip() for movie in movies]
        titles_cleaned = [re.sub(r"^\d+\.\s*", "", title) for title in titles]

        # get movie ranking
        rankings = [int(title.split('. ')[0]) for title in titles]

        years = [movie.select_one("span.sc-f30335b4-7") for movie in movies]
        years_cleaned = [year.text.strip() if year else "N/A" for year in years ]# Handle missing values

        # Extract IMDb Rating
        ratings = [movie.select_one("span.ipc-rating-star--rating") for movie in movies]
        ratings_cleaned = [rating.text.strip() if rating else "N/A" for rating in ratings]

        mega_list = list(zip(titles_cleaned, rankings, years_cleaned, ratings_cleaned))

        # write to csv
        with open("imdb_top_250.csv", "w", newline="") as file:
            writer = csv.writer(file)
            writer.writerow(["Title", "Ranking", "Year", "Rating"])
            for movie in mega_list:
                writer.writerow(movie)

    else:
        print("Request failed")

In [42]:
def main():
    url = "https://www.imdb.com/chart/top"
    get_movies(url)


if __name__ == "__main__":
    main()

Request successful
