# 1. Introduction to MetaCritic data
- MetaCritic is a widely recognized platform that aggregates reviews and ratings for movies, TV shows, video games, music, and books. Launched in 2001, MetaCritic consolidates professional critic reviews and user ratings to provide a weighted average score, known as the MetaScore, which reflects the overall reception of a piece of media.
- Purpose: In order to have a more general view of how each movie judged, we will crawl another reliable movie/game rating web, MetaCritic https://www.metacritic.com/movie 

# 2. Libraries

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import csv


# 3. Data Collection

- Get data function:
    - Similar to Investopedia web, we will need to use `User-Agent` to request into MetaCritic web.
    - API template: https://www.metacritic.com/movie/{film_name}/
        - Parameters:
            - film_name: name of film.
    - In this function, we will get scores from users and critics.

In [None]:
def get_metacritic(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
    }
    soup = fetch_movies(url, headers)

    # find div containing metascore
    divMeta = soup.find('div', class_="c-siteReviewScore_background c-siteReviewScore_background-critic_medium")

    # span data-v-e408cafe attribute is the score
    metascore = divMeta.find('span', {'data-v-e408cafe': True}).text
    
    # find div containing userscore
    divUser = soup.find('div', class_="c-siteReviewScore_background c-siteReviewScore_background-user")

    # span data-v-e408cafe attribute is the score
    userscore = divUser.find('span', {'data-v-e408cafe': True}).text
    return metascore, userscore

def from_film_name_to_metacritic_url(film_name):
    film_name = film_name.lower()
    film_name = film_name.replace(" ", "-")
    film_name = film_name.replace(":", "")
    film_name = film_name.replace("'", "")
    film_name = film_name.replace(".", "")
    url = f"https://www.metacritic.com/movie/{film_name}/"
    
    return url

def fetch_movies(url, headers):
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    return soup

def check_film_exists(film_name, failedPath):
    film_url = from_film_name_to_metacritic_url(film_name)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
    }
    response = requests.get(film_url, headers=headers)
    if response.status_code == 200:
        return True
    else:
        
        with open(failedPath, mode='a') as file:
            file.write(f"{film_name}\n")
        return False

- Save the data to file csv

In [None]:
outputPath = "../../Data/results.csv"
inputPath = "../../Data/final_movies.csv"
failedPath = "/failed.txt"
df = pd.read_csv(inputPath)
# init results csv
with open(outputPath, mode='w', newline='') as file:
    writer = csv.writer(file)
    # field names
    writer.writerow(["Title", "Metascore", "Userscore"])
# init fail list
with open(failedPath, mode='w') as file:
    file.write("Film \n")
film_list = df['Title'].tolist()
for film in film_list:
    if(check_film_exists(film, failedPath)):
        metascore, userscore = get_metacritic(from_film_name_to_metacritic_url(film))           
    else:
        metascore = "N/A"
        userscore = "N/A"
    # write results to csv
    with open(outputPath, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([film, metascore, userscore])