In [None]:
import requests
from bs4 import BeautifulSoup
import csv
import os

In [None]:
def get_metacritic(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
    }
    
    try:
        soup = fetch_movies(url, headers)
        
        # Find div containing metascore
        divMeta = soup.find('div', class_="c-siteReviewScore_background c-siteReviewScore_background-critic_medium")
        metascore = divMeta.find('span', {'data-v-e408cafe': True}).text if divMeta else "N/A"
        
        # Find div containing userscore
        divUser = soup.find('div', class_="c-siteReviewScore_background c-siteReviewScore_background-user")
        userscore = divUser.find('span', {'data-v-e408cafe': True}).text if divUser else "N/A"
        
        return metascore, userscore
    except Exception as e:
        print(f"Error fetching data from {url}: {e}")
        return "N/A", "N/A"

def from_film_name_to_metacritic_url(film_name):
    film_name = film_name.lower()
    film_name = film_name.replace(" ", "-")
    film_name = film_name.replace(":", "")
    film_name = film_name.replace("'", "")
    film_name = film_name.replace(".", "")
    url = f"https://www.metacritic.com/movie/{film_name}"
    
    return url

def fetch_movies(url, headers):
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    return soup

def check_film_exists(film_name, failedPath):
    film_url = from_film_name_to_metacritic_url(film_name)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
    }
    response = requests.get(film_url, headers=headers)
    if response.status_code == 200:
        return True
    else:
        with open(failedPath, mode='a') as file:
            file.write(f"{film_name}\n")
        return False

def crawl_failed_films(failedPath, outputPath):
    # Initialize updated data list
    updated_data = []

    # Open the original CSV file for reading
    with open(failedPath, 'r', newline='') as csv_file:
        reader = csv.reader(csv_file)
        header = next(reader)
        
        updated_header = header + ["metascore", "userscore"]
        updated_data.append(updated_header)
        
        # Read each row, access the second column (identifier), and add new fields
        for row in reader:
            identifier = row[1]  
            metascore, userscore = get_metacritic(from_film_name_to_metacritic_url(identifier))
            
            row_with_new_fields = row + [metascore, userscore]
            updated_data.append(row_with_new_fields)

            print("Updated data for", identifier)

    # Write the updated data back into a new CSV file
    with open(outputPath, 'w', newline='') as updated_csv_file:
        writer = csv.writer(updated_csv_file)
        writer.writerows(updated_data)

    print(f'Data has been updated and saved to {outputPath}')


In [None]:
# Specify the file paths
    failedPath = '../../Data/output.csv'  # Input CSV file
    outputPath = '../../Data/updated_output.csv'  # Output CSV file with additional columns

    crawl_failed_films(failedPath, outputPath)