In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
from datetime import datetime, timezone, timedelta
import pytz
from bs4 import BeautifulSoup
import re
import pandas as pd
import time
import requests
import ScraperFC as sfc
import numpy as np
import os
from time import perf_counter
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
pd.set_option('display.max_rows', 50)
pd.set_option('display.min_rows', 50)
pd.set_option('display.max_columns', 50)

In [2]:
def requests_get(url, wait_time=4):
    """ Custom requests.get function for the FBRef module

    Calls requests.get() until the status code is 200.

    Args
    ----
    url : Str
        The URL to get
    Returns
    -------
    : requests.Response
        The response
    """
    got_link = False
    fail_count = 0
    while not got_link:
        # Don't proceed until we've successfully retrieved the page
        time.sleep(wait_time)
        response = requests.get(url)
        if response.status_code != 200:
            fail_count += 1
            if fail_count >= 3:
                got_link = True
            else:
                time.sleep(5)
            # 200 - OK
            # 403 - file not found
            # 500 - server error
        else:
            got_link = True
    return response

In [3]:
# https://www.sportsmole.co.uk/football/ is a decent source to scrape extra time/match reports.
# Do it later
def view_source_comp_info() -> dict:

    source_comp_info = {
        "All": {},
        "FBRef": {
            # Each competition gets its first valid year (from the competition seasons history page on fbref), the url
            # to the season history page, and the "finder" which is used to find the season and match links in HTML
            #################################
            # Men"s club international cups #
            #################################
            "Copa Libertadores": {
                "first valid year": 2014,
                "url": "https://fbref.com/en/comps/14/history/Copa-Libertadores-Seasons",
                "finder": ["Copa-Libertadores"],
            },
            "Champions League": {
                "first valid year": 1991,
                "url": "https://fbref.com/en/comps/8/history/Champions-League-Seasons",
                "finder": ["European-Cup", "Champions-League"],
            },
            "Europa League": {
                "first valid year": 1991,
                "url": "https://fbref.com/en/comps/19/history/Europa-League-Seasons",
                "finder": ["UEFA-Cup", "Europa-League"],
            },
            "Europa Conference League": {
                "first valid year": 2022,
                "url": "https://fbref.com/en/comps/882/history/Europa-Conference-League-Seasons",
                "finder": ["Europa-Conference-League"],
            },
            ####################################
            # Men"s national team competitions #
            ####################################
            "World Cup": {
                "first valid year": 1930,
                "url": "https://fbref.com/en/comps/1/history/World-Cup-Seasons",
                "finder": ["World-Cup"],
            },
            "Copa America": {
                "first valid year": 2015,
                "url": "https://fbref.com/en/comps/685/history/Copa-America-Seasons",
                "finder": ["Copa-America"],
            },
            "Euros": {
                "first valid year": 2000,
                "url": "https://fbref.com/en/comps/676/history/European-Championship-Seasons",
                "finder": ["UEFA-Euro", "European-Championship"],
            },
            ###############
            # Men"s big 5 #
            ###############
            "Big 5 combined": {
                "first valid year": 1996,
                "url": "https://fbref.com/en/comps/Big5/history/Big-5-European-Leagues-Seasons",
                "finder": ["Big-5-European-Leagues"],
            },
            "EPL": {
                "first valid year": 1993,
                "url": "https://fbref.com/en/comps/9/history/Premier-League-Seasons",
                "finder": ["Premier-League"],
            },
            "Ligue 1": {
                "first valid year": 1996,
                "url": "https://fbref.com/en/comps/13/history/Ligue-1-Seasons",
                "finder": ["Ligue-1", "Division-1"],
            },
            "Ligue 2": {
                "first valid year": 2009,
                "url": "https://fbref.com/en/comps/60/history/Ligue-2-Seasons",
                "finder": ["Ligue-2"],
            },
            "Bundesliga": {
                "first valid year": 1989,
                "url": "https://fbref.com/en/comps/20/history/Bundesliga-Seasons",
                "finder": ["Bundesliga"],
            },
            "Bundesliga 2": {
                "first valid year": 2003,
                "url": "https://fbref.com/en/comps/33/history/2-Bundesliga-Seasons",
                "finder": ["Bundesliga"],
            },
            "Serie A": {
                "first valid year": 1989,
                "url": "https://fbref.com/en/comps/11/history/Serie-A-Seasons",
                "finder": ["Serie-A"],
            },
            "Serie B": {
                "first valid year": 2014,
                "url": "https://fbref.com/en/comps/18/history/Serie-B-Seasons",
                "finder": ["Serie-B"],
            },
            "La Liga": {
                "first valid year": 1989,
                "url": "https://fbref.com/en/comps/12/history/La-Liga-Seasons",
                "finder": ["La-Liga"],
            },
            "La Liga 2": {
                "first valid year": 2001,
                "url": "https://fbref.com/en/comps/17/history/Segunda-Division-Seasons",
                "finder": ["Segunda-Division"],
            },
            #####################################
            # Men"s domestic leagues - 1st tier #
            #####################################
            "MLS": {
                "first valid year": 1996,
                "url": "https://fbref.com/en/comps/22/history/Major-League-Soccer-Seasons",
                "finder": ["Major-League-Soccer"],
            },
            "Brazilian Serie A": {
                "first valid year": 2014,
                "url": "https://fbref.com/en/comps/24/history/Serie-A-Seasons",
                "finder": ["Serie-A"],
            },
            "Eredivisie": {
                "first valid year": 2001,
                "url": "https://fbref.com/en/comps/23/history/Eredivisie-Seasons",
                "finder": ["Eredivisie"],
            },
            "Liga MX": {
                "first valid year": 2004,
                "url": "https://fbref.com/en/comps/31/history/Liga-MX-Seasons",
                "finder": ["Primera-Division", "Liga-MX"],
            },
            "Primeira Liga": {
                "first valid year": 2001,
                "url": "https://fbref.com/en/comps/32/history/Primeira-Liga-Seasons",
                "finder": ["Primeira-Liga"],
            },
            ####################################
            # Men"s domestic league - 2nd tier #
            ####################################
            "EFL Championship": {
                "first valid year": 2014,
                "url": "https://fbref.com/en/comps/10/history/Championship-Seasons",
                "finder": ["First-Division", "Championship"],
            },
            ##############################################
            # Men"s domestic league - 3rd tier and lower #
            ##############################################
            #######################
            # Men"s domestic cups #
            #######################
            #########################################
            # Women"s internation club competitions #
            #########################################
            "Women Champions League": {
                "first valid year": 2015,
                "url": "https://fbref.com/en/comps/181/history/Champions-League-Seasons",
                "finder": ["Champions-League"],
            },
            ######################################
            # Women"s national team competitions #
            ######################################
            "Womens World Cup": {
                "first valid year": 1991,
                "url": "https://fbref.com/en/comps/106/history/Womens-World-Cup-Seasons",
                "finder": ["Womens-World-Cup"],
            },
            "Womens Euros": {
                "first valid year": 2001,
                "url": "https://fbref.com/en/comps/162/history/UEFA-Womens-Euro-Seasons",
                "finder": ["UEFA-Womens-Euro"],
            },
            ############################
            # Women"s domestic leagues #
            ############################
            "NWSL": {
                "first valid year": 2013,
                "url": "https://fbref.com/en/comps/182/history/NWSL-Seasons",
                "finder": ["NWSL"],
            },
            "A-League Women": {
                "first valid year": 2019,
                "url": "https://fbref.com/en/comps/196/history/A-League-Women-Seasons",
                "finder": ["A-League-Women", "W-League"],
            },
            "WSL": {
                "first valid year": 2017,
                "url": "https://fbref.com/en/comps/189/history/Womens-Super-League-Seasons",
                "finder": ["Womens-Super-League-1"],
            },
            "D1 Feminine": {
                "first valid year": 2018,
                "url": "https://fbref.com/en/comps/193/history/Division-1-Feminine-Seasons",
                "finder": ["Division-1-Feminine"],
            },
            "Womens Bundesliga": {
                "first valid year": 2017,
                "url": "https://fbref.com/en/comps/183/history/Frauen-Bundesliga-Seasons",
                "finder": ["Frauen-Bundesliga"],
            },
            "Womens Serie A": {
                "first valid year": 2019,
                "url": "https://fbref.com/en/comps/208/history/Serie-A-Seasons",
                "finder": ["Serie-A"],
            },
            "Liga F": {
                "first valid year": 2023,
                "url": "https://fbref.com/en/comps/230/history/Liga-F-Seasons",
                "finder": ["Liga-F"],
            },
            #########################
            # Women"s domestic cups #
            #########################
            "NWSL Challenge Cup": {
                "first valid year": 2020,
                "url": "https://fbref.com/en/comps/881/history/NWSL-Challenge-Cup-Seasons",
                "finder": ["NWSL-Challenge-Cup"],
            },
            "NWSL Fall Series": {
                "first valid year": 2020,
                "url": "https://fbref.com/en/comps/884/history/NWSL-Fall-Series-Seasons",
                "finder": ["NWSL-Fall-Series"],
            },
        },
        "Understat": {
            "EPL": {"first valid year": 2015, },
            "La Liga": {"first valid year": 2015, },
            "Bundesliga":  {"first valid year": 2015, },
            "Serie A":  {"first valid year": 2015, },
            "Ligue 1":  {"first valid year": 2015, },
            "RFPL":  {"first valid year": 2015, },
        },
        "FiveThirtyEight": {
            "EPL":  {"first valid year": 2017, },
            "La Liga":  {"first valid year": 2017, },
            "Bundesliga":  {"first valid year": 2017, },
            "Serie A":  {"first valid year": 2017, },
            "Ligue 1":  {"first valid year": 2017, },
        },
        # "SofaScore": {"USL League One":  {"first valid year": 2019,}},
        "Capology": {
            "Bundesliga":  {"first valid year": 2014, },
            "2.Bundesliga":  {"first valid year": 2020, },
            "EPL":  {"first valid year": 2014, },
            "EFL Championship":  {"first valid year": 2014, },
            "Serie A":  {"first valid year": 2010, },
            "Serie B":  {"first valid year": 2020, },
            "La Liga":  {"first valid year": 2014, },
            "La Liga 2":  {"first valid year": 2020, },
            "Ligue 1":  {"first valid year": 2014, },
            "Ligue 2":  {"first valid year": 2020, },
            "Eredivisie":  {"first valid year": 2014, },
            "Primeira Liga":  {"first valid year": 2014, },
            "Scottish PL":  {"first valid year": 2020, },
            "Super Lig":  {"first valid year": 2014, },
            "Belgian 1st Division":  {"first valid year": 2014, },
        },
        "Transfermarkt": {
            "EPL":  {"first valid year": 1993, },
            "EFL Championship": {"first valid year": 2005, },
            "EFL1": {"first valid year": 2005, },
            "EFL2": {"first valid year": 2005, },
            "Bundesliga": {"first valid year": 1964, },
            "2.Bundesliga": {"first valid year": 1982, },
            "Serie A": {"first valid year": 1930, },
            "Serie B": {"first valid year": 1930, },
            "La Liga": {"first valid year": 1929, },
            "La Liga 2": {"first valid year": 1929, },
            "Ligue 1": {"first valid year": 1970, },
            "Ligue 2": {"first valid year": 1993, },
            "Eredivisie": {"first valid year": 1955, },
            "Scottish PL": {"first valid year": 2004, },
            "Super Lig": {"first valid year": 1960, },
            "Jupiler Pro League": {"first valid year": 1987, },
            "Liga Nos": {"first valid year": 1994, },
            "Russian Premier League": {"first valid year": 2011, },
            "Brasileirao": {"first valid year": 2001, },
            "Argentina Liga Profesional": {"first valid year": 2015, },
            "MLS": {"first valid year": 1996, },
        },
        "Oddsportal": {
            "EPL": {
                "url": "https://www.oddsportal.com/football/england/premier-league",
                "first valid year": 2004,
                "finder": "premier-league",
            },
            "EFL Championship": {
                "url": "https://www.oddsportal.com/football/england/championship",
                "first valid year": 2004,
                "finder": "championship",
            },
            "EFL League 1": {
                "url": "https://www.oddsportal.com/football/england/league-one",
                "first valid year": 2004,
                "finder": "league-one",
            },
            "EFL League 2": {
                "url": "https://www.oddsportal.com/football/england/league-two",
                "first valid year": 2004,
                "finder": "league-two",
            },
            "La Liga": {
                "url": "https://www.oddsportal.com/football/spain/laliga",
                "first valid year": 2004,
                "finder": "laliga",
            },
        },
    }

    return source_comp_info

In [4]:
def get_source_comp_info(year, league, source):
    """ Checks to make sure that the given league season is a valid season for the scraper.

    Args
    ----
    year : int
        Calendar year that the season ends in (e.g. 2023 for the 2022/23 season)
    league : str
        League. Look in shared_functions.py for the available leagues for each\
        module.
    source : str
        The scraper to be checked (e.g. "FBRef", "Transfermarkt, etc.). These\
        are the ScraperFC modules.
    Returns
    -------
    err : str
        String of the error message, if there is one.
    valid : bool
        True if the league season is valid for the scraper. False otherwise.
    """
    # Dict of data sources and leagues for each source
    source_comp_info = view_source_comp_info()

    # Check source
    if type(source) != str:
        raise TypeError("Source must be a string.")
    if source not in list(source_comp_info.keys()):
        raise Exception(f"source exception {source}, {source_comp_info}")

    # Check league
    if type(league) != str:
        raise TypeError("League must be a string.")
    if league not in list(source_comp_info[source].keys()):
        raise Exception(f"league exception {league}, {source}, {source_comp_info}")

    # Check year
    if source == "Oddsportal":
        if type(year) not in [type(None), int]:
            raise TypeError("For Oddsportal, the year must be an integer or `None` for the current season.")
        if type(year) == int and year < source_comp_info[source][league]["first valid year"]:
            raise Exception(f"year exception {year}, {league}, {source}, {source_comp_info}")
    else:
        if type(year) != int and source != "Oddsportal":
            raise TypeError("Year must be an integer.")
        if year < source_comp_info[source][league]["first valid year"]:
            raise Exception(f"year exception {year}, {league}, {source_comp_info}")

    # Some source competition info is year-dependent. Handle that here.
    if source == "Oddsportal":
        if league == "La Liga" and year is not None and year < 2017:
            source_comp_info["Oddsportal"]["La Liga"]["url"] = "https://www.oddsportal.com/football/spain/primera-division"
            source_comp_info["Oddsportal"]["La Liga"]["finder"] = "primera-division"

    return source_comp_info

In [5]:
def get_season_link(year, league):
    """ Returns the URL for the chosen league season.

    Args
    ----
    year : int
        Calendar year that the season ends in (e.g. 2023 for the 2022/23 season)
    league : str
        League. Look in shared_functions.py for the available leagues for each module.
    Returns
    -------
    : str
        URL to the FBRef page of the chosen league season 
    """
    source_comp_info = get_source_comp_info(year, league, "FBRef")

    url = source_comp_info["FBRef"][league]["url"]
    finder = source_comp_info["FBRef"][league]["finder"]

    # go to the league's history page
    response = requests_get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    calendar_years = [str(year)+'-'+str(year+1), str(year)]  # list of 1- and 2-calendar years strings to work for any competition

    # Get url to season
    for tag in soup.find_all("th", {"data-stat": ["year", "year_id"]}):
        finder_found = np.any([f in tag.find("a")["href"] for f in finder if tag.find("a")])  # bool, if any finders are found in tag
        season_found = np.any([tag.getText() == s for s in calendar_years])  # bool, if 1- or 2-calendar years are found in tag
        if tag.find("a") and finder_found and season_found:
            return "https://fbref.com"+tag.find("a")["href"]

    raise Exception(f"season unavailable {year}, {league}, FBRef")

In [6]:
def get_match_links(year, league):
    """ Gets all match links for the chosen league season.

    Args
    ----
    year : int
        Calendar year that the season ends in (e.g. 2023 for the 2022/23 season)
    league : str
        League. Look in shared_functions.py for the available leagues for each module.
    Returns
    -------
    : list
        FBRef links to all matches for the chosen league season
    """
    source_comp_info = get_source_comp_info(year, league, 'FBRef')

    print(f'Gathering match links {year}, {league}')
    season_link = get_season_link(year, league)
    if season_link == -1:
        return None

    # go to the scores and fixtures page
    split = season_link.split('/')
    first_half = '/'.join(split[:-1])
    second_half = split[-1].split('-')
    second_half = '-'.join(second_half[:-1])+'-Score-and-Fixtures'
    fixtures_url = first_half+'/schedule/'+second_half
    response = requests_get(fixtures_url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # check if there are any scores elements with links. if not, no match links are present
    scores_links = [t.find(href=True) for t in soup.find_all("td", {"data-stat": "score"}) if t.find(href=True)]
    if len(scores_links) == 0:
        raise Exception(f"no links for {fixtures_url}, {year}, {league}")

    # find all of the match links from the scores and fixtures page that have the sources finder
    finders = source_comp_info["FBRef"][league]["finder"]
    match_links = [
        "https://fbref.com"+t["href"]
        for t in scores_links
        if t and np.any([f in t["href"] for f in finders])
    ]

    return match_links

In [7]:
def stats_minute_adj(stats_df: pd.DataFrame, minute_column: tuple[str, str], events_df: pd.DataFrame | None, stoppage_total: int, gk: bool = False) -> pd.Series:
    if events_df is None:
        stats_df[minute_column] = stoppage_total + stats_df[minute_column]
    else:
        event_minutes = events_df[(events_df['event_team'] == 'home') & ~(events_df['event_type'].isin(['yellow_card', 'goal']))]
        event_minutes.columns = pd.MultiIndex.from_product([event_minutes.columns, ['']])
        stats_df = stats_df.merge(right=event_minutes[[('player_id', ''), ('minutes_played', '')]], on=('player_id',), how='left')
        stats_df.loc[stats_df[('minutes_played', '')].notnull(), minute_column] = stats_df.loc[stats_df[('minutes_played', '')].notnull(), ('minutes_played', '')]
        stats_df.loc[~stats_df[('minutes_played', '')].notnull(), minute_column] = stoppage_total + stats_df.loc[~stats_df[('minutes_played', '')].notnull(), minute_column]
    if not gk:
        stats_df.loc[stats_df.index[-1], minute_column] = stats_df.loc[stats_df.index[:-1], minute_column].sum()
    else:
        pass
    return stats_df[minute_column]

In [8]:
def convert_age_to_days(stats_df: pd.DataFrame, age_column: tuple[str, str]) -> pd.Series:
    raw_age = stats_df[stats_df[age_column].notnull()][age_column]
    age_series = raw_age.str.split('-', regex=False).apply(lambda x: int(x[0])*365 + int(x[1]) if len(x) == 2 else int(x[0])*365)
    return age_series

In [80]:
current_timezone = datetime.now(timezone.utc).astimezone().tzinfo


def scrape_match(link, wait_time=0):
    """ Scrapes an FBRef match page.
    Args
    ----
    link : str
        URL to the FBRef match page
    Returns
    -------
    : Pandas DataFrame
        DataFrame containing most parts of the match page if they're available (e.g. formations, lineups, scores, \
        player stats, etc.). The fields that are available vary by competition and year.
    """
    response = requests_get(link, wait_time=wait_time)
    soup = BeautifulSoup(response.content, 'html.parser')
    # Matchweek/stage ==============================================================================================
    stage_el = list(soup.find('a', {'href': re.compile('-Stats')}, string=True).parents)[0]
    stage_text = stage_el.getText().split("(")[1].split(")")[0].strip()
    if "matchweek" in stage_text:
        stage = int(stage_text.lower().replace("matchweek", "").strip())
    else:
        stage = stage_text
    # Team names and ids ===========================================================================================
    scorebox = soup.find('div', {'class': 'scorebox'})
    try:
        match_cancelled = scorebox.find('a', href=re.compile('matchup')).find_next()
        if any(sub_st in match_cancelled.text for sub_st in ['*', 'cancel', 'poned']):
            print(f'match canceled, {link}')
            return None
    except Exception as err:
        raise Exception(f'Unexpected Error, {err}')

    data_points = scorebox.find_all('div', {'class': 'datapoint'})
    team_els = [
        el.find('a')
        for el
        in scorebox.find_all('strong')
        if el.find('a', href=True) is not None][:2]
    home_team_name = team_els[0].getText()
    home_team_id = team_els[0]['href'].split('/')[3]
    away_team_name = team_els[1].getText()
    away_team_id = team_els[1]['href'].split('/')[3]

    if len(data_points) >= 4:
        home_manager = data_points[0].find('strong', string='Manager').next_sibling[2:].replace(u'\xa0', u' ').strip()
        away_manager = data_points[2].find('strong', string='Manager').next_sibling[2:].replace(u'\xa0', u' ').strip()
        home_captain = data_points[1].find('a', href=True).string.replace(u'\xa0', u' ').strip()
        away_captain = data_points[3].find('a', href=True).string.replace(u'\xa0', u' ').strip()
    elif len(data_points) == 2:
        home_manager = data_points[0].find('strong', string='Manager').next_sibling[2:].replace(u'\xa0', u' ').strip()
        away_manager = data_points[1].find('strong', string='Manager').next_sibling[2:].replace(u'\xa0', u' ').strip()
        home_captain = None
        away_captain = None
    elif len(data_points) == 0:
        return None
    else:
        capt_mang_elemnts = scorebox.find_all('div', {'class': 'datapoint'}, 'strong')
        if 'Captain' in capt_mang_elemnts[0].get_text() and 'Manager' in capt_mang_elemnts[1].get_text() and 'Captain' in capt_mang_elemnts[2].get_text():
            home_manager = None
            home_captain = capt_mang_elemnts[0].get_text().split(':')[1].replace(u'\xa0', u' ').strip()
            away_manager = capt_mang_elemnts[1].get_text().split(':')[1].replace(u'\xa0', u' ').strip()
            away_captain = capt_mang_elemnts[2].get_text().split(':')[1].replace(u'\xa0', u' ').strip()

        elif 'Manager' in capt_mang_elemnts[0].get_text() and 'Manager' in capt_mang_elemnts[1].get_text() and 'Captain' in capt_mang_elemnts[2].get_text():
            home_manager = capt_mang_elemnts[0].get_text().split(':')[1].replace(u'\xa0', u' ').strip()
            home_captain = None
            away_manager = capt_mang_elemnts[1].get_text().split(':')[1].replace(u'\xa0', u' ').strip()
            away_captain = capt_mang_elemnts[2].get_text().split(':')[1].replace(u'\xa0', u' ').strip()

        elif 'Manager' in capt_mang_elemnts[0].get_text() and 'Captain' in capt_mang_elemnts[1].get_text() and 'Captain' in capt_mang_elemnts[2].get_text():
            home_manager = capt_mang_elemnts[0].get_text().split(':')[1].replace(u'\xa0', u' ').strip()
            home_captain = capt_mang_elemnts[1].get_text().split(':')[1].replace(u'\xa0', u' ').strip()
            away_manager = None
            away_captain = capt_mang_elemnts[2].get_text().split(':')[1].replace(u'\xa0', u' ').strip()

        elif 'Manager' in capt_mang_elemnts[0].get_text() and 'Captain' in capt_mang_elemnts[1].get_text() and 'Manager' in capt_mang_elemnts[2].get_text():
            home_manager = capt_mang_elemnts[0].get_text().split(':')[1].replace(u'\xa0', u' ').strip()
            home_captain = capt_mang_elemnts[1].get_text().split(':')[1].replace(u'\xa0', u' ').strip()
            away_manager = capt_mang_elemnts[2].get_text().split(':')[1].replace(u'\xa0', u' ').strip()
            away_captain = None

        else:
            raise Exception('Unexpected page layout')

    attendance = scorebox.find('small', string='Attendance')
    if attendance is not None:
        attendance = attendance.findNext().text.strip()
    try:
        venue = scorebox.find('small', string='Venue').findNext().text.strip()
    except AttributeError:
        venue = None
    officials = re.split(" . ", scorebox.find('small', string='Officials').findNext().text.replace(u'\xa0', u' ').strip())

    # Scores =======================================================================================================
    scores = scorebox.find_all('div', {'class': 'score'})

    # Formations ===================================================================================================
    lineup_tags = [tag.find('table') for tag in soup.find_all('div', {'class': 'lineup'})]

    # embedd visual info because formation table is organized by jersey number
    home_pos_soup = soup.find_all('div', {'class': 'poptip a'})
    home_pos_df = pd.DataFrame(columns=['player', 'top', 'left'], index=range(len(home_pos_soup)))
    for i, player in enumerate(home_pos_soup):
        home_pos_df.loc[i, 'player'] = player.attrs['title']
        embedded_pos = re.findall('calc\((\d*.\d* - \d*)', player.attrs['style'].replace('%', ''))
        home_pos_df.loc[i, 'top'] = embedded_pos[0]
        # x_pos will be a left calc for home team, right calc for away team
        home_pos_df.loc[i, 'left'] = embedded_pos[1]

    away_pos_soup = soup.find_all('div', {'class': 'poptip b'})
    away_pos_df = pd.DataFrame(columns=['player', 'top', 'right'], index=range(len(away_pos_soup)))
    for i, player in enumerate(away_pos_soup):
        away_pos_df.loc[i, 'player'] = player.attrs['title']
        embedded_pos = re.findall('calc\((\d*.\d* - \d*)', player.attrs['style'].replace('%', ''))
        away_pos_df.loc[i, 'top'] = embedded_pos[0]
        # x_pos will be a left calc for home team, right calc for away team
        away_pos_df.loc[i, 'right'] = embedded_pos[1]

    # too lazy to refactor loop below so just make a dict..
    form_pos_dict = {'Home': home_pos_df, 'Away': away_pos_df}

    # Player stats =================================================================================================
    # Use table ID's to find the appropriate table. More flexible than xpath
    player_stats = dict()
    for i, (team, team_id) in enumerate([('Home', home_team_id), ('Away', away_team_id)]):
        summary_tag = soup.find_all('table', {'id': re.compile(f'stats_{team_id}_summary')})
        assert len(summary_tag) < 2
        summary_df = pd.read_html(str(summary_tag[0]))[0] if len(summary_tag) == 1 else None
        gk_tag = soup.find_all('table', {'id': re.compile(f'keeper_stats_{team_id}')})
        assert len(gk_tag) < 2
        gk_df = pd.read_html(str(gk_tag[0]))[0] if len(gk_tag) == 1 else None
        passing_tag = soup.find_all('table', {'id': re.compile(f'stats_{team_id}_passing$')})
        assert len(passing_tag) < 2
        passing_df = pd.read_html(str(passing_tag[0]))[0] if len(passing_tag) == 1 else None
        pass_types_tag = soup.find_all('table', {'id': re.compile(f'stats_{team_id}_passing_types')})
        assert len(pass_types_tag) < 2
        pass_types_df = pd.read_html(str(pass_types_tag[0]))[0] if len(pass_types_tag) == 1 else None
        defense_tag = soup.find_all('table', {'id': re.compile(f'stats_{team_id}_defense')})
        assert len(defense_tag) < 2
        defense_df = pd.read_html(str(defense_tag[0]))[0] if len(defense_tag) == 1 else None
        possession_tag = soup.find_all('table', {'id': re.compile(f'stats_{team_id}_possession')})
        assert len(possession_tag) < 2
        possession_df = pd.read_html(str(possession_tag[0]))[0] if len(possession_tag) == 1 else None
        misc_tag = soup.find_all('table', {'id': re.compile(f'stats_{team_id}_misc')})
        assert len(misc_tag) < 2
        misc_df = pd.read_html(str(misc_tag[0]))[0] if len(misc_tag) == 1 else None
        lineup_df = pd.read_html(str(lineup_tags[i]))[0] if len(lineup_tags) != 0 else None
        formation = (
            lineup_df.columns[0].split('(')[-1].replace(')', '').strip()
            if lineup_df is not None else None
        )

        # Field player_id's for the stats tables -------------------------------------------------------------------
        # Note: if a coach gets a yellow/red card, they appear in the player stats tables, in their own row, at the
        # bottom.

        if summary_df is not None:
            player_ids = list()
            # Iterate across all els that are player/coach names in the summary stats table
            for tag in summary_tag[0].find_all('th', {'data-stat': 'player', 'scope': 'row', 'class': 'left'}):
                if tag.find('a'):
                    # if th el has an a subel, it should contain an href link to the player
                    player_id = tag.find('a')['href'].split('/')[3]
                else:
                    # coaches and the summary row have now a subel (and no player_id)
                    player_id = ''
                player_ids.append(player_id)
            summary_df['player_id'] = player_ids
            if passing_df is not None:
                passing_df['player_id'] = player_ids
            if pass_types_df is not None:
                pass_types_df['player_id'] = player_ids
            if defense_df is not None:
                defense_df['player_id'] = player_ids
            if possession_df is not None:
                possession_df['player_id'] = player_ids
            if misc_df is not None:
                misc_df['player_id'] = player_ids

        # GK ID's --------------------------------------------------------------------------------------------------
        if gk_df is not None:
            gk_ids = [
                tag.find('a')['href'].split('/')[3]
                for tag
                in gk_tag[0].find_all('th', {'data-stat': 'player'})
                if tag.find('a')
            ]
            gk_df['player_id'] = gk_ids

        # Build player stats dict ----------------------------------------------------------------------------------
        # This will be turned into a Series and then put into the match dataframe
        player_stats[team] = {
            'starting_lineup': form_pos_dict[team],
            'bench': lineup_df.iloc[12:, 1] if lineup_df is not None else None,
            'summary': summary_df,
            'gk': gk_df,
            'passing': passing_df,
            'pass_types': pass_types_df,
            'defense': defense_df,
            'possession': possession_df,
            'misc': misc_df,
            'formation': formation
        }

    # Shots ========================================================================================================
    both_shots = soup.find_all('table', {'id': 'shots_all'})
    if len(both_shots) == 1:
        both_shots = pd.read_html(str(both_shots[0]))[0]
        both_shots = both_shots[~both_shots.isna().all(axis=1)]
        both_shots_df = pd.DataFrame(both_shots)
    else:
        both_shots_df = None

    # Expected stats flag ==========================================================================================
    expected = 'Expected' in player_stats['Home']['summary'].columns.get_level_values(0)

    # Venue Info  =================================================================================================
    venuetime = soup.find('span', {'class': 'venuetime'})
    # website returns timestamp in current timezone, extract that vs the venue time because weather information is by utc time and its easier to know machine's timezone than match timezone
    if venuetime is not None:
        timestamp = int(venuetime.get('data-venue-epoch'))  # type: ignore
        utc_match_date_time = datetime.fromtimestamp(timestamp, tz=current_timezone).astimezone(timezone.utc)
        local_match_date = datetime.strptime(venuetime.get('data-venue-date'), '%Y-%m-%d').date()  # type: ignore
    else:
        local_match_date = datetime.strptime(
            str(soup.find('h1'))
            .split('<br/>')[0]
            .split('–')[-1]  # not a normal dash
            .replace('</h1>', '')
            .split('(')[0]
            .strip(),
            '%A %B %d, %Y').date()
        utc_match_date_time = None
    # Generic Match Stats =========================================================================================
    team_stats_div = soup.find('div', {'id': 'team_stats'})

    stat_info = team_stats_div.find_all('th', {'colspan': '2'})
    match_stats_categories = [col.text.lower().replace(' ', '_') for col in stat_info]

    match_stats = {}
    for i, stat in enumerate(match_stats_categories):
        if stat == 'possession':
            match_stats[f'home_{stat}_frac'] = int(stat_info[i].findNext().find('strong').text.replace('%', '')) / 100
            match_stats[f'away_{stat}_frac'] = int(stat_info[i].findNext().find('strong').findNext('strong').text.replace('%', '')) / 100
        elif stat == 'cards':
            pass
        else:
            # sometimes fraction can be blank if its 0 of 0 (saves especially)
            match_stats[f'home_{stat}'] = int(re.split('of (\d*)', stat_info[i].findNext().find('strong').findPrevious().text)[1])
            match_stats[f'away_{stat}'] = int(re.split('of (\d*)', stat_info[i].findNext().find('strong').findNext('strong').findPrevious().text)[1])
            frac_text = stat_info[i].findNext().find('strong').text.replace('%', '')
            match_stats[f'home_{stat}_frac'] = int(frac_text) / 100 if frac_text != '' else None
            frac_text = stat_info[i].findNext().find('strong').findNext('strong').text.replace('%', '')
            match_stats[f'away_{stat}_frac'] = int(frac_text) / 100 if frac_text != '' else None

    generic_stats_df = pd.DataFrame(data=match_stats, index=[0])

    # Match Events  ===============================================================================================
    all_match_events = soup.find_all('div', {'class': ['event a', 'event b']})
    match_events = {'event_team': [], 'event_time': [], 'home_event_score': [], 'away_event_score': [],
                    'event_type': [], 'player_id': [], 'player_name': [], }
    for event in all_match_events:
        event_header = event.find('div')
        event_info = event_header.find_next('div', {'class': 'event_icon'})
        event_str = event_info.attrs['class'][1].replace('_in', '')
        if 'penalty_shootout' not in event_str:
            match_events['event_team'].append('home' if event.attrs['class'][1] == 'a' else 'away')
            match_events['event_time'].append(re.split('’', event_header.text)[0].replace('\n', '').replace('\t', '').replace('\xa0', ''))
            match_events['home_event_score'].append(re.split(':', re.split('’', event_header.text)[1].replace('\n', '').replace('\t', ''))[0])
            match_events['away_event_score'].append(re.split(':', re.split('’', event_header.text)[1].replace('\n', '').replace('\t', ''))[1])
            match_events['event_type'].append(event_str)
            try:
                match_events['player_id'].append(event_info.find_next('a')['href'].split('/')[3])
                match_events['player_name'].append(event_info.find_next('a')['href'].split('/')[4])
            except KeyError:
                match_events['player_id'].append('manager')
                match_events['player_name'].append('manager')
            if event_str == 'substitute':
                match_events['event_team'].append(match_events['event_team'][-1])
                match_events['event_time'].append(match_events['event_time'][-1])
                match_events['home_event_score'].append(match_events['home_event_score'][-1])
                match_events['away_event_score'].append(match_events['away_event_score'][-1])
                try:
                    sub_info = event_info.find_next_sibling()
                    out_info = sub_info.find('small')
                    match_events['player_id'].append(out_info.find('a')['href'].split('/')[3])
                    match_events['player_name'].append(out_info.find('a')['href'].split('/')[4])
                    match_events['event_type'][-1] += '_in'
                    match_events['event_type'].append('substitute_out')
                except (KeyError, TypeError, AttributeError):
                    match_events['event_type'][-1] = 'substitute_out'
                    match_events['player_id'].append(None)
                    match_events['player_name'].append(None)
                    match_events['event_type'].append('irregular_sub_data')
    if len(match_events['event_time']) > 0:
        events_df = pd.DataFrame(match_events, index=range(len(match_events['event_time'])))

        events_df['event_time'] = events_df['event_time'].str.split('+', regex=False).apply(lambda x: [int(el) for el in x])
        events_first_half_stoppage = events_df['event_time'].apply(lambda x: x[1] if len(x) == 2 and x[0] == 45 else 0).max()
        events_second_half_stoppage = events_df['event_time'].apply(lambda x: x[1] if len(x) == 2 and x[0] == 90 else 0).max()
        events_first_et_stoppage = events_df['event_time'].apply(lambda x: x[1] if len(x) == 2 and x[0] == 105 else 0).max()
        events_second_et_stoppage = events_df['event_time'].apply(lambda x: x[1] if len(x) == 2 and x[0] == 120 else 0).max()
    else:
        events_first_half_stoppage, events_second_half_stoppage, events_first_et_stoppage, events_second_et_stoppage = 0, 0, 0, 0
        events_df = None

    if both_shots_df is not None:
        # Adjust match time for stoppage time (not exact but best I can do with data on FBRef), sometimes auto_interpreted as other dtype so force as str
        both_shots_df[('Unnamed: 0_level_0', 'Minute')] = both_shots_df[('Unnamed: 0_level_0', 'Minute')].astype(str).str.split('+', regex=False).apply(lambda x: [int(el.split('.')[0]) for el in x])
        both_shots_df['minute'] = both_shots_df[('Unnamed: 0_level_0', 'Minute')].apply(sum)
        # both_shots['minute'] = sum([int(minute) for minute in both_shots['minute']])

        shots_first_half_stoppage = both_shots_df[('Unnamed: 0_level_0', 'Minute')].apply(lambda x: x[1] if len(x) == 2 and x[0] == 45 else 0).max()
        shots_second_half_stoppage = both_shots_df[('Unnamed: 0_level_0', 'Minute')].apply(lambda x: x[1] if len(x) == 2 and x[0] == 90 else 0).max()
        shots_first_et_stoppage = both_shots_df[('Unnamed: 0_level_0', 'Minute')].apply(lambda x: x[1] if len(x) == 2 and x[0] == 105 else 0).max()
        shots_second_et_stoppage = both_shots_df[('Unnamed: 0_level_0', 'Minute')].apply(lambda x: x[1] if len(x) == 2 and x[0] == 120 else 0).max()

    else:
        shots_first_half_stoppage, shots_second_half_stoppage, shots_first_et_stoppage, shots_second_et_stoppage = 0, 0, 0, 0

    first_half_stoppage = max(events_first_half_stoppage, shots_first_half_stoppage)
    second_half_stoppage = max(events_second_half_stoppage, shots_second_half_stoppage)
    first_et_stoppage = max(events_first_et_stoppage, shots_first_et_stoppage)
    second_et_stoppage = max(events_second_et_stoppage, shots_second_et_stoppage)

    if both_shots_df is not None:
        both_shots_df['minute'] = both_shots_df[('Unnamed: 0_level_0', 'Minute')].apply(lambda x:
                                                                                        # second half et in stoppage time
                                                                                        x[0] + x[1] + first_half_stoppage + second_half_stoppage + 21 + first_et_stoppage if x[0] > 105 and len(x) == 2
                                                                                        # second half et in normal time
                                                                                        else x[0] + first_half_stoppage + second_half_stoppage + 21 + first_et_stoppage if x[0] > 105
                                                                                        # first half et in stoppage time
                                                                                        else x[0] + x[1] + first_half_stoppage + second_half_stoppage + 20 if x[0] > 90 and len(x) == 2
                                                                                        else x[0] + first_half_stoppage + second_half_stoppage + 20 if x[0] > 90  # first half et in normal time
                                                                                        else x[0] + x[1] + 15 + first_half_stoppage if x[0] > 45 and len(x) == 2  # second half in stoppage time
                                                                                        else x[0] + 15 + first_half_stoppage if x[0] > 45  # second half in normal time
                                                                                        else x[0] + x[1] if len(x) == 2  # first half in stoppage time
                                                                                        else x[0])  # first half
    if events_df is not None:
        events_df['minute'] = events_df['event_time'].apply(lambda x:
                                                            # second half et in stoppage time
                                                            x[0] + x[1] + first_half_stoppage + second_half_stoppage + 21 + first_et_stoppage if x[0] > 105 and len(x) == 2
                                                            else x[0] + first_half_stoppage + second_half_stoppage + 21 + first_et_stoppage if x[0] > 105  # second half et in normal time
                                                            else x[0] + x[1] + first_half_stoppage + second_half_stoppage + 20 if x[0] > 90 and len(x) == 2  # first half et in stoppage time
                                                            else x[0] + first_half_stoppage + second_half_stoppage + 20 if x[0] > 90  # first half et in normal time
                                                            else x[0] + x[1] + 15 + first_half_stoppage if x[0] > 45 and len(x) == 2  # second half in stoppage time
                                                            else x[0] + 15 + first_half_stoppage if x[0] > 45  # second half in normal time
                                                            else x[0] + x[1] if len(x) == 2  # first half in stoppage time
                                                            else x[0])  # first half
        events_df['minutes_played'] = events_df['event_time'].apply(lambda x:
                                                                    # second half et in stoppage time
                                                                    x[0] + x[1] + first_half_stoppage + second_half_stoppage + first_et_stoppage if x[0] > 105 and len(x) == 2
                                                                    else x[0] + first_half_stoppage + second_half_stoppage + first_et_stoppage if x[0] > 105  # second half et in normal time
                                                                    else x[0] + x[1] + first_half_stoppage + second_half_stoppage if x[0] > 90 and len(x) == 2  # first half et in stoppage time
                                                                    else x[0] + first_half_stoppage + second_half_stoppage if x[0] > 90  # first half et in normal time
                                                                    else x[0] + x[1] + first_half_stoppage if x[0] > 45 and len(x) == 2  # second half in stoppage time
                                                                    else x[0] + first_half_stoppage if x[0] > 45  # second half in normal time
                                                                    else x[0] + x[1] if len(x) == 2  # first half in stoppage time
                                                                    else x[0])  # first half
        events_df['minutes_played_backup'] = events_df['minutes_played'].copy()
        events_df.loc[events_df['event_type'] == 'substitute_in', 'minutes_played'] = 90 + first_half_stoppage + second_half_stoppage - events_df['minutes_played']
        if any(events_df['minutes_played'] < 0):
            events_df.loc[events_df['event_type'] == 'substitute_in', 'minutes_played'] = 120 + first_half_stoppage + \
                second_half_stoppage + first_et_stoppage + second_et_stoppage - events_df['minutes_played_backup']
        events_df.drop(columns='minutes_played_backup', inplace=True)

    # some older matches are partially missing gk tables
    if player_stats['Home']['gk'] is not None:
        player_stats['Home']['gk'][('Unnamed: 3_level_0', 'Min')] = stats_minute_adj(player_stats['Home']['gk'],
                                                                                     ('Unnamed: 3_level_0', 'Min'), events_df, first_half_stoppage+second_half_stoppage, gk=True)
        player_stats['Home']['gk'][('Unnamed: 2_level_0', 'Age')] = convert_age_to_days(player_stats['Home']['gk'], ('Unnamed: 2_level_0', 'Age'))

    if player_stats['Away']['gk'] is not None:
        player_stats['Away']['gk'][('Unnamed: 3_level_0', 'Min')] = stats_minute_adj(player_stats['Away']['gk'],
                                                                                     ('Unnamed: 3_level_0', 'Min'), events_df, first_half_stoppage+second_half_stoppage, gk=True)
        player_stats['Away']['gk'][('Unnamed: 2_level_0', 'Age')] = convert_age_to_days(player_stats['Away']['gk'], ('Unnamed: 2_level_0', 'Age'))

    home_minutes_series = stats_minute_adj(player_stats['Home']['summary'], ('Unnamed: 5_level_0', 'Min'), events_df, first_half_stoppage+second_half_stoppage)
    away_minutes_series = stats_minute_adj(player_stats['Away']['summary'], ('Unnamed: 5_level_0', 'Min'), events_df, first_half_stoppage+second_half_stoppage)
    home_age_series = convert_age_to_days(player_stats['Home']['summary'], ('Unnamed: 4_level_0', 'Age'))
    away_age_series = convert_age_to_days(player_stats['Away']['summary'], ('Unnamed: 4_level_0', 'Age'))

    for df_key in ['summary', 'passing', 'pass_types', 'defense', 'possession', 'misc']:
        if player_stats['Home'][df_key] is not None:
            player_stats['Home'][df_key][('Unnamed: 5_level_0', 'Min')] = home_minutes_series
            player_stats['Away'][df_key][('Unnamed: 5_level_0', 'Min')] = away_minutes_series
            player_stats['Home'][df_key][('Unnamed: 4_level_0', 'Age')] = home_age_series
            player_stats['Away'][df_key][('Unnamed: 4_level_0', 'Age')] = away_age_series

    if both_shots_df is not None:
        both_shots_df = both_shots_df.drop(columns=('Unnamed: 0_level_0', 'Minute')).sort_values('minute').reset_index(drop=True)
    if events_df is not None:
        events_df = events_df.drop(columns='event_time').sort_values('minute').reset_index(drop=True)

    # Build match series ===========================================================================================
    match = pd.Series(dtype=object)
    match['link'] = link
    match['local_date'] = local_match_date
    match['utc_datetime'] = utc_match_date_time
    match['stage'] = stage
    match['home_team'] = home_team_name
    match['away_team'] = away_team_name
    match['home_team_id'] = home_team_id
    match['away_team_id'] = away_team_id
    match['home_manager'] = home_manager
    match['away_manager'] = away_manager
    match['home_captain'] = home_captain
    match['away_captain'] = away_captain
    match['attendance'] = attendance
    match['venue'] = venue
    match['officials'] = officials
    match['home_formation'] = player_stats['Home']['formation']
    match['away_formation'] = player_stats['Away']['formation']
    match['first_half_stoppage'] = first_half_stoppage
    match['second_half_stoppage'] = second_half_stoppage

    match['home_goals'] = int(scores[0].getText()) if scores[0].getText().isdecimal() else None
    match['away_goals'] = int(scores[1].getText()) if scores[1].getText().isdecimal() else None
    match['home_ast'] = player_stats['Home']['summary'][('Performance', 'Ast')].values[-1]
    match['away_ast'] = player_stats['Away']['summary'][('Performance', 'Ast')].values[-1]
    match['home_xG'] = player_stats['Home']['summary'][('Expected', 'xG')].values[-1] if expected else None
    match['away_xG'] = player_stats['Away']['summary'][('Expected', 'xG')].values[-1] if expected else None
    match['home_npxG'] = player_stats['Home']['summary'][('Expected', 'npxG')].values[-1] if expected else None
    match['away_npxG'] = player_stats['Away']['summary'][('Expected', 'npxG')].values[-1] if expected else None
    match['home_xAG'] = player_stats['Home']['summary'][('Expected', 'xAG')].values[-1] if expected else None
    match['away_xAG'] = player_stats['Away']['summary'][('Expected', 'xAG')].values[-1] if expected else None
    match['match_events'] = events_df
    match['overall_stats'] = generic_stats_df
    match['home_player_stats'] = pd.Series(player_stats['Home']).to_frame().T
    match['away_player_stats'] = pd.Series(player_stats['Away']).to_frame().T
    match['shots'] = both_shots_df

    match = match.to_frame().T  # series to dataframe

    return match

need to do champions league, Europa, Europa Conf, world cup, Euros,  (adapt for extra time and find odds info)

In [100]:
failed_attempts_df = pd.read_parquet('failed_scrape_attempts.parquet')
five_x_fails = failed_attempts_df[failed_attempts_df['link'] == '5x fail, skip'].drop_duplicates()

In [11]:
# failed_attempts_df = pd.read_parquet('failed_scrape_attempts.parquet')
# five_x_fails = failed_attempts_df[failed_attempts_df['link'] == '5x fail, skip'].drop_duplicates()

# failed_attempts = {'league': [], 'year': [], 'link': []}
# for league in five_x_fails['league']:
#     current_league_matches = pd.DataFrame()
#     for year in range(2012, 2024):
#         try:
#             league_year_match_links = get_match_links(year, league)
#             league_year_match_links = pd.Series(league_year_match_links).drop_duplicates().to_list()
#         except Exception as ex:
#             except_str = str(ex)
#             except_str = except_str.split(',')
#             year = except_str[1].strip()
#             league = except_str[2].strip()
#             failed_attempts['league'].append(league)
#             failed_attempts['year'].append(year)
#             failed_attempts['link'].append('no links for this season + year')
#             league_year_match_links = []
#         current_league_year_matches = pd.DataFrame()
#         repeat_fail_counts = 0
#         print(f'starting {league}, {year}')
#         last_fail_i = 0
#         total_matches = len(league_year_match_links)
#         for i, link in enumerate(league_year_match_links):
#             try:
#                 # clear console, can't use fancy sys.out commands like "\033[K" because they don't work in jupyter notebook
#                 print("                                                                                             ", end="\r")
#                 print(f"scraping match, progress = {i+1}/{total_matches}", end="\r")
#                 start_time = perf_counter()
#                 one_match = scrape_match(link)
#                 parse_time = perf_counter()-start_time
#                 if parse_time < 4:
#                     time.sleep(4-parse_time)
#                 current_league_year_matches = pd.concat([current_league_year_matches, one_match])
#             except Exception as err:
#                 print(f'Exception {err}')
#                 if i == last_fail_i + 1:
#                     repeat_fail_counts += 1
#                 last_fail_i = i
#                 if repeat_fail_counts < 5:
#                     print(f'failed attempt {league}, {year}, {link}')
#                     failed_attempts['league'].append(league)
#                     failed_attempts['year'].append(year)
#                     failed_attempts['link'].append(link)
#                 else:
#                     print(f'failed 5 times, skipping {league}, {year}')
#                     failed_attempts['league'].append(league)
#                     failed_attempts['year'].append(year)
#                     failed_attempts['link'].append('5x fail, skip')
#                     break
#                 time.sleep(6)
#                 continue
#         if current_league_year_matches.shape[0] > 0:
#             print(f"finished with {year} for {league}                           ")
#             current_league_year_matches['league'] = league
#             current_league_year_matches['year'] = year
#             current_league_matches = pd.concat([current_league_matches, current_league_year_matches])
#             current_league_matches.reset_index(drop=True).to_pickle(f'C:\\Users\\Alec\\Documents\\Python\\soccer-ai\\data_files\\matches\\missing_{league}_progress_FBRef_scrape.pkl')

#     if current_league_matches.shape[0] > 0:
#         current_league_matches.reset_index(drop=True).to_pickle(f'C:\\Users\\Alec\\Documents\\Python\\soccer-ai\\data_files\\matches\\missing_{league}_FBRef_scrape.pkl')

# if os.path.exists('C:\\Users\\Alec\\Documents\\Python\\soccer-ai\\data_files\\matches\\failed_scrape_attempts.parquet'):
#     previously_failed_attempts = pd.read_parquet('C:\\Users\\Alec\\Documents\\Python\\soccer-ai\\data_files\\matches\\failed_scrape_attempts.parquet')
# else:
#     previously_failed_attempts = pd.DataFrame()

# failed_attempts_df = pd.DataFrame(failed_attempts, index=range(len(failed_attempts['league'])))

# final = pd.concat([failed_attempts_df, previously_failed_attempts], ignore_index=True)
# final[final.columns] = final[final.columns].astype(str)
# final.to_parquet(f'C:\\Users\\Alec\\Documents\\Python\\soccer-ai\\data_files\\matches\\failed_scrape_attempts.parquet')

In [10]:
five_x_fails = pd.read_parquet('missing_before_retry.parquet')

In [11]:
five_x_fails = five_x_fails.sort_values(['league', 'year']).drop_duplicates().reset_index(drop=True)

In [126]:
five_x_fail_matches[['year','league','stage',]].groupby(['year','league'],as_index=False).size().sort_values(['league', 'year']).reset_index(drop=True)

Unnamed: 0,year,league,size
0,2012,Bundesliga,586
1,2013,Bundesliga,892
2,2018,Bundesliga 2,1198
3,2017,EFL Championship,1987
4,2019,Eredivisie,4698
5,2019,La Liga,2367
6,2019,Ligue 1,3547
7,2019,Ligue 2,2927
8,2012,Serie A,3708
9,2013,Serie A,3708


In [138]:
five_x_fail_matches.columns

Index(['link', 'local_date', 'utc_datetime', 'stage', 'home_team', 'away_team',
       'home_team_id', 'away_team_id', 'home_manager', 'away_manager',
       'home_captain', 'away_captain', 'attendance', 'venue', 'officials',
       'home_formation', 'away_formation', 'first_half_stoppage',
       'second_half_stoppage', 'home_goals', 'away_goals', 'home_ast',
       'away_ast', 'home_xG', 'away_xG', 'home_npxG', 'away_npxG', 'home_xAG',
       'away_xAG', 'match_events', 'overall_stats', 'home_player_stats',
       'away_player_stats', 'shots', 'league', 'year'],
      dtype='object')

In [145]:
# five_x_fail_matches[(five_x_fail_matches['year']==2012)&(five_x_fail_matches['league']=='Bundesliga')]['link'].value_counts().sum()
g = five_x_fail_matches[(five_x_fail_matches['year']==2018)&(five_x_fail_matches['league']=='Bundesliga 2')]
g[g.duplicated(['home_team','away_team','stage',],keep=False)]
# g

Unnamed: 0,link,local_date,utc_datetime,stage,home_team,away_team,home_team_id,away_team_id,home_manager,away_manager,home_captain,away_captain,attendance,venue,officials,home_formation,away_formation,first_half_stoppage,second_half_stoppage,home_goals,away_goals,home_ast,away_ast,home_xG,away_xG,home_npxG,away_npxG,home_xAG,away_xAG,match_events,overall_stats,home_player_stats,away_player_stats,shots,league,year
403,https://fbref.com/en/matches/b93a4ce2/Greuther...,2012-10-06,,Matchweek 7,Greuther Fürth,Hamburger SV,12192a4c,26790c6a,Mike Büskens,Thorsten Fink,,,,,[Thorsten Kinhöfer (Referee)],4-1-2-1-2,4-2-3-1,0,0,0,1,,,,,,,,,event_team home_event_score away_event_scor...,Empty DataFrame Columns: [] Index: [0],starting_...,starting_...,,Bundesliga 2,2018
416,https://fbref.com/en/matches/b88afc09/Hamburge...,2013-03-02,,Matchweek 24,Hamburger SV,Greuther Fürth,26790c6a,12192a4c,Thorsten Fink,Ludwig Preis,,,,,[Daniel Siebert (Referee)],4-1-2-1-2,4-1-4-1,0,0,1,1,,,,,,,,,event_team home_event_score away_event_scor...,Empty DataFrame Columns: [] Index: [0],starting_...,starting_...,,Bundesliga 2,2018
984,https://fbref.com/en/matches/afa3788d/Hamburge...,2019-03-04,2019-03-04 19:30:00+00:00,Matchweek 24,Hamburger SV,Greuther Fürth,26790c6a,12192a4c,Hannes Wolf,Stefan Leitl,Aaron Hunt,Marco Caligiuri,36560.0,"Volksparkstadion, Hamburg","[Christian Dingert (Referee), Marcel Pelgrim (...",4-1-4-1,4-1-4-1,0,4,1,0,1.0,0.0,1.2,0.7,1.2,0.7,1.2,0.4,event_team home_event_score away_event_scor...,home_possession_frac away_possession_frac ...,starting_...,starting_...,Unnamed: 1_level_0 Unnamed: 2_level_0 U...,Bundesliga 2,2018
1192,https://fbref.com/en/matches/d2f864dd/Greuther...,2018-09-27,2018-09-27 18:30:00+00:00,Matchweek 7,Greuther Fürth,Hamburger SV,12192a4c,26790c6a,Damir Burić,Christian Titz,Sascha Burchert,Aaron Hunt,14965.0,"Sportpark Ronhof Thomas Sommer, Fürth","[Sascha Stegemann (Referee), Robert Wessel (AR...",4-4-2,4-2-3-1,0,2,0,0,0.0,0.0,0.4,1.2,0.4,1.2,0.2,1.1,event_team home_event_score away_event_scor...,home_possession_frac away_possession_frac ...,starting_...,starting_...,Unnamed: 1_level_0 Unnamed: 2_level_0 U...,Bundesliga 2,2018


In [135]:
year = 2018
league = 'Bundesliga 2'
league_year_match_links = get_match_links(year, league)
league_year_match_links = list(set(league_year_match_links))

Gathering match links 2018, Bundesliga 2


In [136]:
len(league_year_match_links)

306

In [114]:
five_x_fails[['year','league']].drop_duplicates().sort_values(['league', 'year']).reset_index(drop=True)

Unnamed: 0,year,league
0,2012,Bundesliga
1,2013,Bundesliga
2,2018,Bundesliga 2
3,2017,EFL Championship
4,2019,Eredivisie
5,2019,La Liga
6,2019,Ligue 1
7,2019,Ligue 2
8,2012,Serie A
9,2013,Serie A


In [147]:
failed_attempts_df = pd.read_parquet('failed_scrape_attempts.parquet')
five_x_fails = failed_attempts_df[failed_attempts_df['link'] == '5x fail, skip'].drop_duplicates()
failed_attempts = {'league': [], 'year': [], 'link': []}
five_x_fail_matches = pd.DataFrame()

for i, row in five_x_fails.iterrows():
    league = row['league']
    year = int(row['year'])
    try:
        league_year_match_links = get_match_links(year, league)
        league_year_match_links = list(set(league_year_match_links))
    except Exception as ex:
        except_str = str(ex)
        except_str = except_str.split(',')
        year = except_str[1].strip()
        league = except_str[2].strip()
        failed_attempts['league'].append(league)
        failed_attempts['year'].append(year)
        failed_attempts['link'].append('no links for this season + year')
        league_year_match_links = []
    repeat_fail_counts = 0
    last_fail_i = 0
    total_matches = len(league_year_match_links)
    one_year_one_league = pd.DataFrame()
    print(f'starting {league}, {year}')
    for i, link in enumerate(league_year_match_links):
        1 + 1
        # try:
        #     # clear console, can't use fancy sys.out commands like "\033[K" because they don't work in jupyter notebook
        #     print("                                                                                             ", end="\r")
        #     print(f"scraping match, progress = {i+1}/{total_matches}", end="\r")
        #     start_time = perf_counter()
        #     one_match = scrape_match(link)
        #     parse_time = perf_counter()-start_time
        #     if one_match is not None:
        #         one_match['league'] = league
        #         one_match['year'] = year
        #     if parse_time < 4:
        #         time.sleep(4-parse_time)
        #     one_year_one_league = pd.concat([one_year_one_league, one_match], ignore_index=True)
        # except Exception as err:
        #     print(f'Exception {err}')
        #     if i == last_fail_i + 1:
        #         repeat_fail_counts += 1
        #     last_fail_i = i
        #     if repeat_fail_counts < 5:
        #         print(f'failed attempt {league}, {year}, {link}')
        #         failed_attempts['league'].append(league)
        #         failed_attempts['year'].append(year)
        #         failed_attempts['link'].append(link)
        #     else:
        #         print(f'failed 5 times, skipping {league}, {year}')
        #         failed_attempts['league'].append(league)
        #         failed_attempts['year'].append(year)
        #         failed_attempts['link'].append('5x fail, skip')
        #         break
        #     time.sleep(6)
        #     continue
    if one_year_one_league.shape[0] > 0:
        print(f"finished with {year} for {league}, num matches {total_matches}                           ")
        one_year_one_league['league'] = league
        one_year_one_league['year'] = year
        five_x_fail_matches = pd.concat([five_x_fail_matches, one_year_one_league], ignore_index=True)
        five_x_fail_matches.reset_index(drop=True).to_pickle(f'C:\\Users\\Alec\\Documents\\py\\football_stats\\matches\\five_x_fail_progress2_FBRef_scrape.pkl')

if five_x_fail_matches.shape[0] > 0:
    five_x_fail_matches.reset_index(drop=True).to_pickle(f'C:\\Users\\Alec\\Documents\\py\\football_stats\\matches\\five_x_fail_final2_FBRef_scrape')
failed_attempts_df = pd.DataFrame(failed_attempts, index=range(len(failed_attempts['league'])))
failed_attempts_df.to_parquet('C:\\Users\\Alec\\Documents\\py\\football_stats\\matches\\still_failed.parquet')

Gathering match links 2019, Ligue 2


KeyboardInterrupt: 

In [146]:
failed_attempts_df

Unnamed: 0,league,year,link
0,{'All': {},Serie B,no links for this season + year
1,{'All': {},Serie B,no links for this season + year
2,Serie A,2012,https://fbref.com/en/matches/c5d02381/Napoli-P...
3,Serie A,2012,https://fbref.com/en/matches/8f974e95/Cagliari...
4,Serie A,2012,https://fbref.com/en/matches/734ca415/Torino-F...
5,Serie A,2012,https://fbref.com/en/matches/00a51003/Fiorenti...
6,Serie A,2012,https://fbref.com/en/matches/b449f479/Udinese-...
7,Serie A,2012,"5x fail, skip"
8,Serie A,2013,no links for this season + year
9,Serie A,2021,no links for this season + year


In [108]:
def more_fails():
    '''
    Gathering match links 2019, Ligue 2
starting Ligue 2, 2019
match canceled, https://fbref.com/en/matches/2191ad75/Orleans-Nancy-May-15-2020-Ligue-2      
match canceled, https://fbref.com/en/matches/f03e4138/Chambly-Oise-Le-Havre-May-8-2020-Ligue-2
match canceled, https://fbref.com/en/matches/6bafd83b/Orleans-Caen-April-3-2020-Ligue-2      
match canceled, https://fbref.com/en/matches/59de48a2/Nancy-Ajaccio-March-13-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/273c3550/Le-Havre-Ajaccio-May-15-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/520788c3/Nancy-Grenoble-Foot-April-17-2020-Ligue-2
match canceled, https://fbref.com/en/matches/e1a38701/Le-Mans-Troyes-March-13-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/05244e1a/Guingamp-Paris-FC-April-24-2020-Ligue-2
match canceled, https://fbref.com/en/matches/32756139/Le-Havre-Sochaux-April-10-2020-Ligue-2 
match canceled, https://fbref.com/en/matches/c41e05ef/Valenciennes-Caen-March-13-2020-Ligue-2
match canceled, https://fbref.com/en/matches/7fc28eb3/Guingamp-Chambly-Oise-April-3-2020-Ligue-2
match canceled, https://fbref.com/en/matches/835b6cf1/Le-Havre-Rodez-Aveyron-April-21-2020-Ligue-2
match canceled, https://fbref.com/en/matches/cad91865/Niort-Lens-May-8-2020-Ligue-2          
match canceled, https://fbref.com/en/matches/56141f88/Le-Havre-Valenciennes-March-20-2020-Ligue-2
match canceled, https://fbref.com/en/matches/8719fb8a/Lens-Le-Mans-May-15-2020-Ligue-2       
match canceled, https://fbref.com/en/matches/396ff23a/Caen-Rodez-Aveyron-May-1-2020-Ligue-2  
match canceled, https://fbref.com/en/matches/9d3eb089/Clermont-Foot-Chambly-Oise-April-10-2020-Ligue-2
match canceled, https://fbref.com/en/matches/122548c1/Chateauroux-Orleans-March-20-2020-Ligue-2
match canceled, https://fbref.com/en/matches/fbaa1a87/Sochaux-Niort-April-3-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/6b38ed3e/Clermont-Foot-Ajaccio-April-17-2020-Ligue-2
match canceled, https://fbref.com/en/matches/353b207b/Valenciennes-Orleans-April-17-2020-Ligue-2
match canceled, https://fbref.com/en/matches/b1384a94/Paris-FC-Orleans-April-10-2020-Ligue-2 
match canceled, https://fbref.com/en/matches/f4b93fd3/Paris-FC-Valenciennes-May-1-2020-Ligue-2
match canceled, https://fbref.com/en/matches/bcbd8eb0/Chateauroux-Clermont-Foot-May-15-2020-Ligue-2
match canceled, https://fbref.com/en/matches/647c2a38/Ajaccio-Chambly-Oise-April-21-2020-Ligue-2
match canceled, https://fbref.com/en/matches/1edbb43b/Valenciennes-Chambly-Oise-May-15-2020-Ligue-2
match canceled, https://fbref.com/en/matches/7c1d470e/Sochaux-Chateauroux-April-24-2020-Ligue-2
match canceled, https://fbref.com/en/matches/55e366c5/Ajaccio-Guingamp-April-10-2020-Ligue-2 
match canceled, https://fbref.com/en/matches/5d751adf/Grenoble-Foot-Sochaux-May-1-2020-Ligue-2
match canceled, https://fbref.com/en/matches/f9ebf084/Orleans-Auxerre-April-21-2020-Ligue-2  
match canceled, https://fbref.com/en/matches/6af94ea9/Paris-FC-Nancy-April-21-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/ef22a9ef/Le-Havre-Clermont-Foot-May-1-2020-Ligue-2
match canceled, https://fbref.com/en/matches/a2595146/Niort-Lorient-April-10-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/1273db5c/Lorient-Le-Havre-March-16-2020-Ligue-2 
match canceled, https://fbref.com/en/matches/25069b64/Lorient-Valenciennes-May-8-2020-Ligue-2
match canceled, https://fbref.com/en/matches/9b60f289/Paris-FC-Grenoble-Foot-April-3-2020-Ligue-2
match canceled, https://fbref.com/en/matches/a39cebee/Auxerre-Chateauroux-April-3-2020-Ligue-2
match canceled, https://fbref.com/en/matches/b10a869c/Valenciennes-Troyes-April-3-2020-Ligue-2
match canceled, https://fbref.com/en/matches/2225bfaf/Rodez-Aveyron-Ajaccio-April-3-2020-Ligue-2
match canceled, https://fbref.com/en/matches/59ff1690/Lorient-Chateauroux-April-17-2020-Ligue-2
match canceled, https://fbref.com/en/matches/1bac8560/Sochaux-Chambly-Oise-March-13-2020-Ligue-2
match canceled, https://fbref.com/en/matches/82fd3b73/Sochaux-Lens-April-17-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/a8986453/Le-Mans-Paris-FC-May-8-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/4e7d6b99/Chambly-Oise-Troyes-April-17-2020-Ligue-2
match canceled, https://fbref.com/en/matches/ef814e00/Chambly-Oise-Rodez-Aveyron-March-20-2020-Ligue-2
match canceled, https://fbref.com/en/matches/f50de72e/Troyes-Lorient-March-21-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/dea3d1f6/Nancy-Auxerre-May-8-2020-Ligue-2       
match canceled, https://fbref.com/en/matches/e0a63c91/Le-Mans-Caen-April-17-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/ec569e89/Troyes-Niort-May-15-2020-Ligue-2       
match canceled, https://fbref.com/en/matches/f46ab77c/Niort-Chambly-Oise-May-1-2020-Ligue-2  
match canceled, https://fbref.com/en/matches/b898f7eb/Grenoble-Foot-Guingamp-May-15-2020-Ligue-2
match canceled, https://fbref.com/en/matches/17cc871c/Le-Mans-Clermont-Foot-April-3-2020-Ligue-2
match canceled, https://fbref.com/en/matches/ef530c4c/Ajaccio-Sochaux-March-20-2020-Ligue-2  
match canceled, https://fbref.com/en/matches/8d12cfeb/Auxerre-Rodez-Aveyron-May-15-2020-Ligue-2
match canceled, https://fbref.com/en/matches/e2e14967/Grenoble-Foot-Lorient-April-21-2020-Ligue-2
match canceled, https://fbref.com/en/matches/32568052/Rodez-Aveyron-Clermont-Foot-March-13-2020-Ligue-2
match canceled, https://fbref.com/en/matches/b94dfe4a/Clermont-Foot-Caen-May-8-2020-Ligue-2  
match canceled, https://fbref.com/en/matches/231fde60/Caen-Guingamp-April-21-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/932a192d/Auxerre-Paris-FC-April-17-2020-Ligue-2 
match canceled, https://fbref.com/en/matches/e1b57c71/Lens-Rodez-Aveyron-April-10-2020-Ligue-2
match canceled, https://fbref.com/en/matches/3da3311b/Lorient-Lens-April-4-2020-Ligue-2      
match canceled, https://fbref.com/en/matches/f0059b76/Chambly-Oise-Lens-April-24-2020-Ligue-2
match canceled, https://fbref.com/en/matches/6c920e93/Clermont-Foot-Grenoble-Foot-April-24-2020-Ligue-2
match canceled, https://fbref.com/en/matches/4ae4164d/Chateauroux-Le-Mans-April-21-2020-Ligue-2
match canceled, https://fbref.com/en/matches/7f5f809b/Rodez-Aveyron-Grenoble-Foot-May-8-2020-Ligue-2
match canceled, https://fbref.com/en/matches/5929761f/Guingamp-Le-Havre-April-17-2020-Ligue-2
match canceled, https://fbref.com/en/matches/d18fc19d/Guingamp-Chateauroux-May-8-2020-Ligue-2
match canceled, https://fbref.com/en/matches/bce0f8c8/Rodez-Aveyron-Troyes-April-24-2020-Ligue-2
match canceled, https://fbref.com/en/matches/23896136/Caen-Auxerre-April-10-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/a8771278/Lens-Ajaccio-May-1-2020-Ligue-2        
match canceled, https://fbref.com/en/matches/2b976988/Paris-FC-Lorient-May-15-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/f34647cc/Sochaux-Orleans-May-8-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/9b1ef3f3/Lens-Nancy-March-23-2020-Ligue-2       
match canceled, https://fbref.com/en/matches/e18639c5/Chateauroux-Nancy-May-1-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/14e6ea06/Troyes-Guingamp-May-1-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/579ba60b/Grenoble-Foot-Auxerre-March-20-2020-Ligue-2
match canceled, https://fbref.com/en/matches/501b08be/Nancy-Le-Havre-April-6-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/14725764/Orleans-Le-Mans-May-1-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/7b0d6553/Niort-Clermont-Foot-April-21-2020-Ligue-2
match canceled, https://fbref.com/en/matches/65dea7df/Caen-Paris-FC-March-20-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/20bff44c/Troyes-Nancy-April-10-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/2b729d39/Grenoble-Foot-Le-Mans-April-10-2020-Ligue-2
match canceled, https://fbref.com/en/matches/a8ac15fe/Chateauroux-Valenciennes-April-10-2020-Ligue-2
match canceled, https://fbref.com/en/matches/6d29dff0/Valenciennes-Auxerre-April-24-2020-Ligue-2
match canceled, https://fbref.com/en/matches/7eb18c4b/Lens-Valenciennes-April-21-2020-Ligue-2
match canceled, https://fbref.com/en/matches/e754cc9e/Lorient-Orleans-April-24-2020-Ligue-2  
match canceled, https://fbref.com/en/matches/0d57a1d2/Guingamp-Niort-March-13-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/2fb4636e/Le-Mans-Le-Havre-April-24-2020-Ligue-2 
match canceled, https://fbref.com/en/matches/7abbf758/Ajaccio-Troyes-May-8-2020-Ligue-2      
match canceled, https://fbref.com/en/matches/d548d6bb/Auxerre-Lorient-May-1-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/c270961f/Orleans-Grenoble-Foot-March-13-2020-Ligue-2
match canceled, https://fbref.com/en/matches/4e0672dc/Troyes-Sochaux-April-21-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/86182446/Ajaccio-Niort-April-24-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/6c25ddb8/Nancy-Caen-April-24-2020-Ligue-2       
match canceled, https://fbref.com/en/matches/e67c9daa/Rodez-Aveyron-Niort-April-17-2020-Ligue-2
match canceled, https://fbref.com/en/matches/057edc26/Clermont-Foot-Guingamp-March-20-2020-Ligue-2
match canceled, https://fbref.com/en/matches/d9ba672e/Auxerre-Lens-March-14-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/45e4cc38/Paris-FC-Chateauroux-March-13-2020-Ligue-2
match canceled, https://fbref.com/en/matches/8a9e5b11/Niort-Le-Mans-March-20-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/605fdc57/Caen-Sochaux-May-15-2020-Ligue-2       
finished with 2019 for Ligue 2                                                               
Gathering match links 2012, Bundesliga
starting Bundesliga, 2012
finished with 2012 for Bundesliga                                                            
Gathering match links 2013, Bundesliga
starting Bundesliga, 2013
finished with 2013 for Bundesliga                                                            
Gathering match links 2018, Bundesliga 2
starting Bundesliga 2, 2018
finished with 2018 for Bundesliga 2                                                          
Gathering match links 2019, Eredivisie
starting Eredivisie, 2019
match canceled, https://fbref.com/en/matches/cd79a296/Twente-Fortuna-Sittard-April-5-2020-Eredivisie
match canceled, https://fbref.com/en/matches/a32440d4/Zwolle-Twente-April-12-2020-Eredivisie 
match canceled, https://fbref.com/en/matches/700e202d/Fortuna-Sittard-PSV-Eindhoven-March-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/1363cecc/Heerenveen-RKC-Waalwijk-March-21-2020-Eredivisie
match canceled, https://fbref.com/en/matches/b1cbaddc/Fortuna-Sittard-Utrecht-April-12-2020-Eredivisie
match canceled, https://fbref.com/en/matches/1f417dd5/AZ-Alkmaar-Vitesse-April-4-2020-Eredivisie
match canceled, https://fbref.com/en/matches/2c3fb439/Heerenveen-Emmen-April-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/d39493be/AZ-Alkmaar-Utrecht-May-3-2020-Eredivisie
match canceled, https://fbref.com/en/matches/9c300370/Heracles-Almelo-Groningen-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/32c7f488/Zwolle-Heracles-Almelo-March-14-2020-Eredivisie
match canceled, https://fbref.com/en/matches/2a3179ea/Vitesse-RKC-Waalwijk-April-11-2020-Eredivisie
match canceled, https://fbref.com/en/matches/9be27ae4/Emmen-Ajax-April-12-2020-Eredivisie    
match canceled, https://fbref.com/en/matches/26429ec2/De-Klassieker-Feyenoord-Ajax-March-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/e2a21e33/ADO-Den-Haag-Feyenoord-April-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/84b2985a/Heracles-Almelo-Sparta-Rotterdam-March-20-2020-Eredivisie
match canceled, https://fbref.com/en/matches/08c8bd08/VVV-Venlo-ADO-Den-Haag-April-26-2020-Eredivisie
match canceled, https://fbref.com/en/matches/979c726a/RKC-Waalwijk-Feyenoord-April-5-2020-Eredivisie
match canceled, https://fbref.com/en/matches/b970fade/Ajax-Vitesse-April-23-2020-Eredivisie  
match canceled, https://fbref.com/en/matches/43bbed16/AZ-Alkmaar-Feyenoord-April-8-2020-Eredivisie
match canceled, https://fbref.com/en/matches/0d9233ef/Twente-VVV-Venlo-April-23-2020-Eredivisie
match canceled, https://fbref.com/en/matches/d4c08acf/PSV-Eindhoven-Emmen-March-14-2020-Eredivisie
match canceled, https://fbref.com/en/matches/2909c0b3/Willem-II-Heerenveen-March-14-2020-Eredivisie
match canceled, https://fbref.com/en/matches/5686b200/PSV-Eindhoven-Sparta-Rotterdam-April-12-2020-Eredivisie
match canceled, https://fbref.com/en/matches/b232d911/Fortuna-Sittard-AZ-Alkmaar-April-25-2020-Eredivisie
match canceled, https://fbref.com/en/matches/034174b3/Willem-II-Twente-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/c0c07037/Heerenveen-PSV-Eindhoven-May-3-2020-Eredivisie
match canceled, https://fbref.com/en/matches/d5ab0a54/Ajax-Twente-March-15-2020-Eredivisie   
match canceled, https://fbref.com/en/matches/36155374/Ajax-VVV-Venlo-May-3-2020-Eredivisie   
match canceled, https://fbref.com/en/matches/a5f77799/Fortuna-Sittard-Ajax-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/746b307e/PSV-Eindhoven-Utrecht-April-25-2020-Eredivisie
match canceled, https://fbref.com/en/matches/7a2c9d97/Heracles-Almelo-AZ-Alkmaar-April-11-2020-Eredivisie
match canceled, https://fbref.com/en/matches/d59fc411/Feyenoord-Groningen-April-25-2020-Eredivisie
match canceled, https://fbref.com/en/matches/e8677ffc/Utrecht-Vitesse-March-14-2020-Eredivisie
match canceled, https://fbref.com/en/matches/b9b52377/Sparta-Rotterdam-Feyenoord-March-15-2020-Eredivisie
match canceled, https://fbref.com/en/matches/7d023b29/Sparta-Rotterdam-Heerenveen-April-4-2020-Eredivisie
match canceled, https://fbref.com/en/matches/1a6b8477/Utrecht-Heracles-Almelo-April-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/205c7221/ADO-Den-Haag-Zwolle-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/6ca3f188/Willem-II-ADO-Den-Haag-April-11-2020-Eredivisie
match canceled, https://fbref.com/en/matches/3bbc8495/RKC-Waalwijk-Fortuna-Sittard-May-3-2020-Eredivisie
match canceled, https://fbref.com/en/matches/20a9cdc2/AZ-Alkmaar-PSV-Eindhoven-April-21-2020-Eredivisie
match canceled, https://fbref.com/en/matches/5047d9bc/Feyenoord-Vitesse-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/09445052/Ajax-Zwolle-April-5-2020-Eredivisie    
match canceled, https://fbref.com/en/matches/b7bd8625/Zwolle-VVV-Venlo-March-21-2020-Eredivisie
match canceled, https://fbref.com/en/matches/87b12cbb/Twente-ADO-Den-Haag-March-21-2020-Eredivisie
match canceled, https://fbref.com/en/matches/17388e4d/VVV-Venlo-Willem-II-April-4-2020-Eredivisie
match canceled, https://fbref.com/en/matches/bbd20c6e/Emmen-AZ-Alkmaar-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/1dd4751a/RKC-Waalwijk-Willem-II-April-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/488ad085/Heracles-Almelo-Twente-April-26-2020-Eredivisie
match canceled, https://fbref.com/en/matches/538029df/Utrecht-Groningen-April-4-2020-Eredivisie
match canceled, https://fbref.com/en/matches/80704664/Zwolle-Emmen-May-3-2020-Eredivisie     
match canceled, https://fbref.com/en/matches/ed5b4c1c/Utrecht-Heerenveen-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/52c0dbc6/Groningen-Heerenveen-April-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/96833911/Vitesse-Willem-II-March-21-2020-Eredivisie
match canceled, https://fbref.com/en/matches/97cb9cc7/RKC-Waalwijk-Groningen-March-15-2020-Eredivisie
match canceled, https://fbref.com/en/matches/44e3bdca/ADO-Den-Haag-Emmen-April-5-2020-Eredivisie
match canceled, https://fbref.com/en/matches/e4c3b865/Willem-II-Ajax-April-26-2020-Eredivisie
match canceled, https://fbref.com/en/matches/86facbaf/Vitesse-Sparta-Rotterdam-April-26-2020-Eredivisie
match canceled, https://fbref.com/en/matches/39f2f8f8/VVV-Venlo-AZ-Alkmaar-March-15-2020-Eredivisie
match canceled, https://fbref.com/en/matches/e7f1d278/Zwolle-Heerenveen-April-25-2020-Eredivisie
match canceled, https://fbref.com/en/matches/db400d3f/Vitesse-Heracles-Almelo-May-3-2020-Eredivisie
match canceled, https://fbref.com/en/matches/50dfc13f/Emmen-Utrecht-March-22-2020-Eredivisie 
match canceled, https://fbref.com/en/matches/0a8197f2/PSV-Eindhoven-Heracles-Almelo-April-4-2020-Eredivisie
match canceled, https://fbref.com/en/matches/4e3b5f58/Utrecht-Ajax-April-9-2020-Eredivisie   
match canceled, https://fbref.com/en/matches/4b282bb9/Feyenoord-VVV-Venlo-April-11-2020-Eredivisie
match canceled, https://fbref.com/en/matches/c8ce271d/VVV-Venlo-Sparta-Rotterdam-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/4cf846fb/Sparta-Rotterdam-Willem-II-May-3-2020-Eredivisie
match canceled, https://fbref.com/en/matches/846c03b5/Groningen-AZ-Alkmaar-March-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/c339b4a2/Emmen-RKC-Waalwijk-April-25-2020-Eredivisie
match canceled, https://fbref.com/en/matches/5216887f/ADO-Den-Haag-Fortuna-Sittard-March-13-2020-Eredivisie
match canceled, https://fbref.com/en/matches/777d95f8/PSV-Eindhoven-RKC-Waalwijk-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/67b1a464/Groningen-Fortuna-Sittard-April-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/b8d2edda/Groningen-ADO-Den-Haag-May-3-2020-Eredivisie
match canceled, https://fbref.com/en/matches/4a93d50b/Sparta-Rotterdam-Zwolle-April-21-2020-Eredivisie
match canceled, https://fbref.com/en/matches/cd64efd0/Twente-Feyenoord-May-3-2020-Eredivisie 
finished with 2019 for Eredivisie                                                            
Gathering match links 2017, EFL Championship
starting EFL Championship, 2017
finished with 2017 for EFL Championship                                                      
starting {'All': {}, Serie B
finished with Serie B for {'All': {}                           
Gathering match links 2019, La Liga
starting La Liga, 2019
finished with 2019 for La Liga                                                               
Gathering match links 2019, Ligue 2
starting Ligue 2, 2019
match canceled, https://fbref.com/en/matches/2191ad75/Orleans-Nancy-May-15-2020-Ligue-2      
match canceled, https://fbref.com/en/matches/f03e4138/Chambly-Oise-Le-Havre-May-8-2020-Ligue-2
match canceled, https://fbref.com/en/matches/6bafd83b/Orleans-Caen-April-3-2020-Ligue-2      
match canceled, https://fbref.com/en/matches/59de48a2/Nancy-Ajaccio-March-13-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/273c3550/Le-Havre-Ajaccio-May-15-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/520788c3/Nancy-Grenoble-Foot-April-17-2020-Ligue-2
match canceled, https://fbref.com/en/matches/e1a38701/Le-Mans-Troyes-March-13-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/05244e1a/Guingamp-Paris-FC-April-24-2020-Ligue-2
match canceled, https://fbref.com/en/matches/32756139/Le-Havre-Sochaux-April-10-2020-Ligue-2 
match canceled, https://fbref.com/en/matches/c41e05ef/Valenciennes-Caen-March-13-2020-Ligue-2
match canceled, https://fbref.com/en/matches/7fc28eb3/Guingamp-Chambly-Oise-April-3-2020-Ligue-2
match canceled, https://fbref.com/en/matches/835b6cf1/Le-Havre-Rodez-Aveyron-April-21-2020-Ligue-2
match canceled, https://fbref.com/en/matches/cad91865/Niort-Lens-May-8-2020-Ligue-2          
match canceled, https://fbref.com/en/matches/56141f88/Le-Havre-Valenciennes-March-20-2020-Ligue-2
match canceled, https://fbref.com/en/matches/8719fb8a/Lens-Le-Mans-May-15-2020-Ligue-2       
match canceled, https://fbref.com/en/matches/396ff23a/Caen-Rodez-Aveyron-May-1-2020-Ligue-2  
match canceled, https://fbref.com/en/matches/9d3eb089/Clermont-Foot-Chambly-Oise-April-10-2020-Ligue-2
match canceled, https://fbref.com/en/matches/122548c1/Chateauroux-Orleans-March-20-2020-Ligue-2
match canceled, https://fbref.com/en/matches/fbaa1a87/Sochaux-Niort-April-3-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/6b38ed3e/Clermont-Foot-Ajaccio-April-17-2020-Ligue-2
match canceled, https://fbref.com/en/matches/353b207b/Valenciennes-Orleans-April-17-2020-Ligue-2
match canceled, https://fbref.com/en/matches/b1384a94/Paris-FC-Orleans-April-10-2020-Ligue-2 
match canceled, https://fbref.com/en/matches/f4b93fd3/Paris-FC-Valenciennes-May-1-2020-Ligue-2
match canceled, https://fbref.com/en/matches/bcbd8eb0/Chateauroux-Clermont-Foot-May-15-2020-Ligue-2
match canceled, https://fbref.com/en/matches/647c2a38/Ajaccio-Chambly-Oise-April-21-2020-Ligue-2
match canceled, https://fbref.com/en/matches/1edbb43b/Valenciennes-Chambly-Oise-May-15-2020-Ligue-2
match canceled, https://fbref.com/en/matches/7c1d470e/Sochaux-Chateauroux-April-24-2020-Ligue-2
match canceled, https://fbref.com/en/matches/55e366c5/Ajaccio-Guingamp-April-10-2020-Ligue-2 
match canceled, https://fbref.com/en/matches/5d751adf/Grenoble-Foot-Sochaux-May-1-2020-Ligue-2
match canceled, https://fbref.com/en/matches/f9ebf084/Orleans-Auxerre-April-21-2020-Ligue-2  
match canceled, https://fbref.com/en/matches/6af94ea9/Paris-FC-Nancy-April-21-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/ef22a9ef/Le-Havre-Clermont-Foot-May-1-2020-Ligue-2
match canceled, https://fbref.com/en/matches/a2595146/Niort-Lorient-April-10-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/1273db5c/Lorient-Le-Havre-March-16-2020-Ligue-2 
match canceled, https://fbref.com/en/matches/25069b64/Lorient-Valenciennes-May-8-2020-Ligue-2
match canceled, https://fbref.com/en/matches/9b60f289/Paris-FC-Grenoble-Foot-April-3-2020-Ligue-2
match canceled, https://fbref.com/en/matches/a39cebee/Auxerre-Chateauroux-April-3-2020-Ligue-2
match canceled, https://fbref.com/en/matches/b10a869c/Valenciennes-Troyes-April-3-2020-Ligue-2
match canceled, https://fbref.com/en/matches/2225bfaf/Rodez-Aveyron-Ajaccio-April-3-2020-Ligue-2
match canceled, https://fbref.com/en/matches/59ff1690/Lorient-Chateauroux-April-17-2020-Ligue-2
match canceled, https://fbref.com/en/matches/1bac8560/Sochaux-Chambly-Oise-March-13-2020-Ligue-2
match canceled, https://fbref.com/en/matches/82fd3b73/Sochaux-Lens-April-17-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/a8986453/Le-Mans-Paris-FC-May-8-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/4e7d6b99/Chambly-Oise-Troyes-April-17-2020-Ligue-2
match canceled, https://fbref.com/en/matches/ef814e00/Chambly-Oise-Rodez-Aveyron-March-20-2020-Ligue-2
match canceled, https://fbref.com/en/matches/f50de72e/Troyes-Lorient-March-21-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/dea3d1f6/Nancy-Auxerre-May-8-2020-Ligue-2       
match canceled, https://fbref.com/en/matches/e0a63c91/Le-Mans-Caen-April-17-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/ec569e89/Troyes-Niort-May-15-2020-Ligue-2       
match canceled, https://fbref.com/en/matches/f46ab77c/Niort-Chambly-Oise-May-1-2020-Ligue-2  
match canceled, https://fbref.com/en/matches/b898f7eb/Grenoble-Foot-Guingamp-May-15-2020-Ligue-2
match canceled, https://fbref.com/en/matches/17cc871c/Le-Mans-Clermont-Foot-April-3-2020-Ligue-2
match canceled, https://fbref.com/en/matches/ef530c4c/Ajaccio-Sochaux-March-20-2020-Ligue-2  
match canceled, https://fbref.com/en/matches/8d12cfeb/Auxerre-Rodez-Aveyron-May-15-2020-Ligue-2
match canceled, https://fbref.com/en/matches/e2e14967/Grenoble-Foot-Lorient-April-21-2020-Ligue-2
match canceled, https://fbref.com/en/matches/32568052/Rodez-Aveyron-Clermont-Foot-March-13-2020-Ligue-2
match canceled, https://fbref.com/en/matches/b94dfe4a/Clermont-Foot-Caen-May-8-2020-Ligue-2  
match canceled, https://fbref.com/en/matches/231fde60/Caen-Guingamp-April-21-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/932a192d/Auxerre-Paris-FC-April-17-2020-Ligue-2 
match canceled, https://fbref.com/en/matches/e1b57c71/Lens-Rodez-Aveyron-April-10-2020-Ligue-2
match canceled, https://fbref.com/en/matches/3da3311b/Lorient-Lens-April-4-2020-Ligue-2      
match canceled, https://fbref.com/en/matches/f0059b76/Chambly-Oise-Lens-April-24-2020-Ligue-2
match canceled, https://fbref.com/en/matches/6c920e93/Clermont-Foot-Grenoble-Foot-April-24-2020-Ligue-2
match canceled, https://fbref.com/en/matches/4ae4164d/Chateauroux-Le-Mans-April-21-2020-Ligue-2
match canceled, https://fbref.com/en/matches/7f5f809b/Rodez-Aveyron-Grenoble-Foot-May-8-2020-Ligue-2
match canceled, https://fbref.com/en/matches/5929761f/Guingamp-Le-Havre-April-17-2020-Ligue-2
match canceled, https://fbref.com/en/matches/d18fc19d/Guingamp-Chateauroux-May-8-2020-Ligue-2
match canceled, https://fbref.com/en/matches/bce0f8c8/Rodez-Aveyron-Troyes-April-24-2020-Ligue-2
match canceled, https://fbref.com/en/matches/23896136/Caen-Auxerre-April-10-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/a8771278/Lens-Ajaccio-May-1-2020-Ligue-2        
match canceled, https://fbref.com/en/matches/2b976988/Paris-FC-Lorient-May-15-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/f34647cc/Sochaux-Orleans-May-8-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/9b1ef3f3/Lens-Nancy-March-23-2020-Ligue-2       
match canceled, https://fbref.com/en/matches/e18639c5/Chateauroux-Nancy-May-1-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/14e6ea06/Troyes-Guingamp-May-1-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/579ba60b/Grenoble-Foot-Auxerre-March-20-2020-Ligue-2
match canceled, https://fbref.com/en/matches/501b08be/Nancy-Le-Havre-April-6-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/14725764/Orleans-Le-Mans-May-1-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/7b0d6553/Niort-Clermont-Foot-April-21-2020-Ligue-2
match canceled, https://fbref.com/en/matches/65dea7df/Caen-Paris-FC-March-20-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/20bff44c/Troyes-Nancy-April-10-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/2b729d39/Grenoble-Foot-Le-Mans-April-10-2020-Ligue-2
match canceled, https://fbref.com/en/matches/a8ac15fe/Chateauroux-Valenciennes-April-10-2020-Ligue-2
match canceled, https://fbref.com/en/matches/6d29dff0/Valenciennes-Auxerre-April-24-2020-Ligue-2
match canceled, https://fbref.com/en/matches/7eb18c4b/Lens-Valenciennes-April-21-2020-Ligue-2
match canceled, https://fbref.com/en/matches/e754cc9e/Lorient-Orleans-April-24-2020-Ligue-2  
match canceled, https://fbref.com/en/matches/0d57a1d2/Guingamp-Niort-March-13-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/2fb4636e/Le-Mans-Le-Havre-April-24-2020-Ligue-2 
match canceled, https://fbref.com/en/matches/7abbf758/Ajaccio-Troyes-May-8-2020-Ligue-2      
match canceled, https://fbref.com/en/matches/d548d6bb/Auxerre-Lorient-May-1-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/c270961f/Orleans-Grenoble-Foot-March-13-2020-Ligue-2
match canceled, https://fbref.com/en/matches/4e0672dc/Troyes-Sochaux-April-21-2020-Ligue-2   
match canceled, https://fbref.com/en/matches/86182446/Ajaccio-Niort-April-24-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/6c25ddb8/Nancy-Caen-April-24-2020-Ligue-2       
match canceled, https://fbref.com/en/matches/e67c9daa/Rodez-Aveyron-Niort-April-17-2020-Ligue-2
match canceled, https://fbref.com/en/matches/057edc26/Clermont-Foot-Guingamp-March-20-2020-Ligue-2
match canceled, https://fbref.com/en/matches/d9ba672e/Auxerre-Lens-March-14-2020-Ligue-2     
match canceled, https://fbref.com/en/matches/45e4cc38/Paris-FC-Chateauroux-March-13-2020-Ligue-2
match canceled, https://fbref.com/en/matches/8a9e5b11/Niort-Le-Mans-March-20-2020-Ligue-2    
match canceled, https://fbref.com/en/matches/605fdc57/Caen-Sochaux-May-15-2020-Ligue-2       
finished with 2019 for Ligue 2                                                               
starting {'All': {}, Serie B
finished with Serie B for {'All': {}                           
Gathering match links 2022, Serie B
starting Serie B, 2022
match canceled, https://fbref.com/en/matches/a6ce6043/Brescia-Cosenza-June-1-2023-Serie-B    
finished with 2022 for Serie B                                                               
Gathering match links 2019, Eredivisie
starting Eredivisie, 2019
match canceled, https://fbref.com/en/matches/cd79a296/Twente-Fortuna-Sittard-April-5-2020-Eredivisie
match canceled, https://fbref.com/en/matches/a32440d4/Zwolle-Twente-April-12-2020-Eredivisie 
match canceled, https://fbref.com/en/matches/700e202d/Fortuna-Sittard-PSV-Eindhoven-March-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/1363cecc/Heerenveen-RKC-Waalwijk-March-21-2020-Eredivisie
match canceled, https://fbref.com/en/matches/b1cbaddc/Fortuna-Sittard-Utrecht-April-12-2020-Eredivisie
match canceled, https://fbref.com/en/matches/1f417dd5/AZ-Alkmaar-Vitesse-April-4-2020-Eredivisie
match canceled, https://fbref.com/en/matches/2c3fb439/Heerenveen-Emmen-April-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/d39493be/AZ-Alkmaar-Utrecht-May-3-2020-Eredivisie
match canceled, https://fbref.com/en/matches/9c300370/Heracles-Almelo-Groningen-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/32c7f488/Zwolle-Heracles-Almelo-March-14-2020-Eredivisie
match canceled, https://fbref.com/en/matches/2a3179ea/Vitesse-RKC-Waalwijk-April-11-2020-Eredivisie
match canceled, https://fbref.com/en/matches/9be27ae4/Emmen-Ajax-April-12-2020-Eredivisie    
match canceled, https://fbref.com/en/matches/26429ec2/De-Klassieker-Feyenoord-Ajax-March-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/e2a21e33/ADO-Den-Haag-Feyenoord-April-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/84b2985a/Heracles-Almelo-Sparta-Rotterdam-March-20-2020-Eredivisie
match canceled, https://fbref.com/en/matches/08c8bd08/VVV-Venlo-ADO-Den-Haag-April-26-2020-Eredivisie
match canceled, https://fbref.com/en/matches/979c726a/RKC-Waalwijk-Feyenoord-April-5-2020-Eredivisie
match canceled, https://fbref.com/en/matches/b970fade/Ajax-Vitesse-April-23-2020-Eredivisie  
match canceled, https://fbref.com/en/matches/43bbed16/AZ-Alkmaar-Feyenoord-April-8-2020-Eredivisie
match canceled, https://fbref.com/en/matches/0d9233ef/Twente-VVV-Venlo-April-23-2020-Eredivisie
match canceled, https://fbref.com/en/matches/d4c08acf/PSV-Eindhoven-Emmen-March-14-2020-Eredivisie
match canceled, https://fbref.com/en/matches/2909c0b3/Willem-II-Heerenveen-March-14-2020-Eredivisie
match canceled, https://fbref.com/en/matches/5686b200/PSV-Eindhoven-Sparta-Rotterdam-April-12-2020-Eredivisie
match canceled, https://fbref.com/en/matches/b232d911/Fortuna-Sittard-AZ-Alkmaar-April-25-2020-Eredivisie
match canceled, https://fbref.com/en/matches/034174b3/Willem-II-Twente-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/c0c07037/Heerenveen-PSV-Eindhoven-May-3-2020-Eredivisie
match canceled, https://fbref.com/en/matches/d5ab0a54/Ajax-Twente-March-15-2020-Eredivisie   
match canceled, https://fbref.com/en/matches/36155374/Ajax-VVV-Venlo-May-3-2020-Eredivisie   
match canceled, https://fbref.com/en/matches/a5f77799/Fortuna-Sittard-Ajax-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/746b307e/PSV-Eindhoven-Utrecht-April-25-2020-Eredivisie
match canceled, https://fbref.com/en/matches/7a2c9d97/Heracles-Almelo-AZ-Alkmaar-April-11-2020-Eredivisie
match canceled, https://fbref.com/en/matches/d59fc411/Feyenoord-Groningen-April-25-2020-Eredivisie
match canceled, https://fbref.com/en/matches/e8677ffc/Utrecht-Vitesse-March-14-2020-Eredivisie
match canceled, https://fbref.com/en/matches/b9b52377/Sparta-Rotterdam-Feyenoord-March-15-2020-Eredivisie
match canceled, https://fbref.com/en/matches/7d023b29/Sparta-Rotterdam-Heerenveen-April-4-2020-Eredivisie
match canceled, https://fbref.com/en/matches/1a6b8477/Utrecht-Heracles-Almelo-April-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/205c7221/ADO-Den-Haag-Zwolle-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/6ca3f188/Willem-II-ADO-Den-Haag-April-11-2020-Eredivisie
match canceled, https://fbref.com/en/matches/3bbc8495/RKC-Waalwijk-Fortuna-Sittard-May-3-2020-Eredivisie
match canceled, https://fbref.com/en/matches/20a9cdc2/AZ-Alkmaar-PSV-Eindhoven-April-21-2020-Eredivisie
match canceled, https://fbref.com/en/matches/5047d9bc/Feyenoord-Vitesse-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/09445052/Ajax-Zwolle-April-5-2020-Eredivisie    
match canceled, https://fbref.com/en/matches/b7bd8625/Zwolle-VVV-Venlo-March-21-2020-Eredivisie
match canceled, https://fbref.com/en/matches/87b12cbb/Twente-ADO-Den-Haag-March-21-2020-Eredivisie
match canceled, https://fbref.com/en/matches/17388e4d/VVV-Venlo-Willem-II-April-4-2020-Eredivisie
match canceled, https://fbref.com/en/matches/bbd20c6e/Emmen-AZ-Alkmaar-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/1dd4751a/RKC-Waalwijk-Willem-II-April-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/488ad085/Heracles-Almelo-Twente-April-26-2020-Eredivisie
match canceled, https://fbref.com/en/matches/538029df/Utrecht-Groningen-April-4-2020-Eredivisie
match canceled, https://fbref.com/en/matches/80704664/Zwolle-Emmen-May-3-2020-Eredivisie     
match canceled, https://fbref.com/en/matches/ed5b4c1c/Utrecht-Heerenveen-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/52c0dbc6/Groningen-Heerenveen-April-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/96833911/Vitesse-Willem-II-March-21-2020-Eredivisie
match canceled, https://fbref.com/en/matches/97cb9cc7/RKC-Waalwijk-Groningen-March-15-2020-Eredivisie
match canceled, https://fbref.com/en/matches/44e3bdca/ADO-Den-Haag-Emmen-April-5-2020-Eredivisie
match canceled, https://fbref.com/en/matches/e4c3b865/Willem-II-Ajax-April-26-2020-Eredivisie
match canceled, https://fbref.com/en/matches/86facbaf/Vitesse-Sparta-Rotterdam-April-26-2020-Eredivisie
match canceled, https://fbref.com/en/matches/39f2f8f8/VVV-Venlo-AZ-Alkmaar-March-15-2020-Eredivisie
match canceled, https://fbref.com/en/matches/e7f1d278/Zwolle-Heerenveen-April-25-2020-Eredivisie
match canceled, https://fbref.com/en/matches/db400d3f/Vitesse-Heracles-Almelo-May-3-2020-Eredivisie
match canceled, https://fbref.com/en/matches/50dfc13f/Emmen-Utrecht-March-22-2020-Eredivisie 
match canceled, https://fbref.com/en/matches/0a8197f2/PSV-Eindhoven-Heracles-Almelo-April-4-2020-Eredivisie
match canceled, https://fbref.com/en/matches/4e3b5f58/Utrecht-Ajax-April-9-2020-Eredivisie   
match canceled, https://fbref.com/en/matches/4b282bb9/Feyenoord-VVV-Venlo-April-11-2020-Eredivisie
match canceled, https://fbref.com/en/matches/c8ce271d/VVV-Venlo-Sparta-Rotterdam-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/4cf846fb/Sparta-Rotterdam-Willem-II-May-3-2020-Eredivisie
match canceled, https://fbref.com/en/matches/846c03b5/Groningen-AZ-Alkmaar-March-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/c339b4a2/Emmen-RKC-Waalwijk-April-25-2020-Eredivisie
match canceled, https://fbref.com/en/matches/5216887f/ADO-Den-Haag-Fortuna-Sittard-March-13-2020-Eredivisie
match canceled, https://fbref.com/en/matches/777d95f8/PSV-Eindhoven-RKC-Waalwijk-May-10-2020-Eredivisie
match canceled, https://fbref.com/en/matches/67b1a464/Groningen-Fortuna-Sittard-April-22-2020-Eredivisie
match canceled, https://fbref.com/en/matches/b8d2edda/Groningen-ADO-Den-Haag-May-3-2020-Eredivisie
match canceled, https://fbref.com/en/matches/4a93d50b/Sparta-Rotterdam-Zwolle-April-21-2020-Eredivisie
match canceled, https://fbref.com/en/matches/cd64efd0/Twente-Feyenoord-May-3-2020-Eredivisie 
finished with 2019 for Eredivisie                                                            
Gathering match links 2019, Ligue 1
starting Ligue 1, 2019
match canceled, https://fbref.com/en/matches/f2512a78/Saint-Etienne-Dijon-May-23-2020-Ligue-1
match canceled, https://fbref.com/en/matches/f4fca137/Montpellier-Marseille-March-14-2020-Ligue-1
match canceled, https://fbref.com/en/matches/67588c5b/Brest-Montpellier-April-26-2020-Ligue-1
match canceled, https://fbref.com/en/matches/b18f296e/Angers-Toulouse-March-21-2020-Ligue-1  
match canceled, https://fbref.com/en/matches/a9969cbc/Monaco-Nantes-April-5-2020-Ligue-1     
match canceled, https://fbref.com/en/matches/a151d71c/Strasbourg-Brest-April-18-2020-Ligue-1 
match canceled, https://fbref.com/en/matches/d12d3ca0/Strasbourg-Marseille-May-2-2020-Ligue-1
match canceled, https://fbref.com/en/matches/1b327b14/Brest-Monaco-April-11-2020-Ligue-1     
match canceled, https://fbref.com/en/matches/1d845950/Lyon-Reims-March-13-2020-Ligue-1       
match canceled, https://fbref.com/en/matches/da07629a/Montpellier-Bordeaux-May-2-2020-Ligue-1
match canceled, https://fbref.com/en/matches/6af82534/Monaco-Toulouse-May-16-2020-Ligue-1    
match canceled, https://fbref.com/en/matches/d9f87fb2/Amiens-Nimes-April-18-2020-Ligue-1     
match canceled, https://fbref.com/en/matches/91b282fd/Marseille-Dijon-April-11-2020-Ligue-1  
match canceled, https://fbref.com/en/matches/826710b4/Lyon-Montpellier-May-9-2020-Ligue-1    
match canceled, https://fbref.com/en/matches/57c36729/Toulouse-Nantes-April-18-2020-Ligue-1  
match canceled, https://fbref.com/en/matches/f8ba7a02/Metz-Nice-May-9-2020-Ligue-1           
match canceled, https://fbref.com/en/matches/b2c790b4/Marseille-Metz-May-16-2020-Ligue-1     
match canceled, https://fbref.com/en/matches/1e73cccc/Dijon-Lyon-May-2-2020-Ligue-1          
match canceled, https://fbref.com/en/matches/09bd6051/Angers-Rennes-May-2-2020-Ligue-1       
match canceled, https://fbref.com/en/matches/c3399237/Angers-Lyon-May-16-2020-Ligue-1        
match canceled, https://fbref.com/en/matches/10b7bd4b/Nimes-Montpellier-April-11-2020-Ligue-1
match canceled, https://fbref.com/en/matches/6ae2dd0a/Reims-Marseille-May-23-2020-Ligue-1    
match canceled, https://fbref.com/en/matches/385c7fa5/Choc-des-Olympiques-Lyon-Marseille-April-18-2020-Ligue-1
match canceled, https://fbref.com/en/matches/2fe7fd6d/Paris-Saint-Germain-Brest-May-2-2020-Ligue-1
match canceled, https://fbref.com/en/matches/13141f17/Toulouse-Paris-Saint-Germain-May-9-2020-Ligue-1
match canceled, https://fbref.com/en/matches/1a829f24/Reims-Toulouse-April-11-2020-Ligue-1   
match canceled, https://fbref.com/en/matches/3cbe315e/Paris-Saint-Germain-Saint-Etienne-April-18-2020-Ligue-1
match canceled, https://fbref.com/en/matches/208e0770/Rennes-Strasbourg-April-26-2020-Ligue-1
match canceled, https://fbref.com/en/matches/a2b045b6/Toulouse-Metz-March-14-2020-Ligue-1    
match canceled, https://fbref.com/en/matches/7595ddd8/Marseille-Monaco-May-9-2020-Ligue-1    
match canceled, https://fbref.com/en/matches/c258110d/Bordeaux-Angers-May-9-2020-Ligue-1     
match canceled, https://fbref.com/en/matches/f4337de9/Rennes-Metz-April-18-2020-Ligue-1      
match canceled, https://fbref.com/en/matches/b39a4356/Brest-Lille-March-14-2020-Ligue-1      
match canceled, https://fbref.com/en/matches/8006241c/Saint-Etienne-Strasbourg-March-22-2020-Ligue-1
match canceled, https://fbref.com/en/matches/c9cadfbb/Montpellier-Reims-April-18-2020-Ligue-1
match canceled, https://fbref.com/en/matches/9e780bc9/Reims-Paris-Saint-Germain-April-26-2020-Ligue-1
match canceled, https://fbref.com/en/matches/84d5f5dc/Toulouse-Nimes-May-2-2020-Ligue-1      
match canceled, https://fbref.com/en/matches/cea88bc9/Brest-Nimes-May-16-2020-Ligue-1        
match canceled, https://fbref.com/en/matches/3dd2e74c/Nantes-Amiens-April-26-2020-Ligue-1    
match canceled, https://fbref.com/en/matches/65bc39f7/Strasbourg-Bordeaux-May-16-2020-Ligue-1
match canceled, https://fbref.com/en/matches/61deec67/Lille-Amiens-May-16-2020-Ligue-1       
match canceled, https://fbref.com/en/matches/66a90faf/Lyon-Nimes-April-5-2020-Ligue-1        
match canceled, https://fbref.com/en/matches/a9a4fa42/Metz-Lille-April-11-2020-Ligue-1       
match canceled, https://fbref.com/en/matches/65f39018/Montpellier-Nantes-May-16-2020-Ligue-1 
match canceled, https://fbref.com/en/matches/b4fb1a04/Nice-Strasbourg-April-11-2020-Ligue-1  
match canceled, https://fbref.com/en/matches/515c0e73/Bordeaux-Toulouse-April-26-2020-Ligue-1
match canceled, https://fbref.com/en/matches/718cf017/Nantes-Strasbourg-May-23-2020-Ligue-1  
match canceled, https://fbref.com/en/matches/7c5d60f4/Metz-Brest-March-21-2020-Ligue-1       
match canceled, https://fbref.com/en/matches/609ab72b/Lille-Reims-May-2-2020-Ligue-1         
match canceled, https://fbref.com/en/matches/8aa9d26a/Paris-Saint-Germain-Metz-April-5-2020-Ligue-1
match canceled, https://fbref.com/en/matches/e075de2d/Nimes-Paris-Saint-Germain-May-23-2020-Ligue-1
match canceled, https://fbref.com/en/matches/9574cfb7/Nantes-Brest-May-9-2020-Ligue-1        
match canceled, https://fbref.com/en/matches/f27a8e0e/Marseille-Nice-April-26-2020-Ligue-1   
match canceled, https://fbref.com/en/matches/57db75ba/Nantes-Nimes-March-14-2020-Ligue-1     
match canceled, https://fbref.com/en/matches/13c491ff/Rennes-Dijon-May-9-2020-Ligue-1        
match canceled, https://fbref.com/en/matches/d29312ea/Lille-Nice-April-18-2020-Ligue-1       
match canceled, https://fbref.com/en/matches/95d4d994/Bordeaux-Reims-April-5-2020-Ligue-1    
match canceled, https://fbref.com/en/matches/7db1787f/Marseille-Paris-Saint-Germain-March-22-2020-Ligue-1
match canceled, https://fbref.com/en/matches/16e073b9/Strasbourg-Paris-Saint-Germain-March-18-2020-Ligue-1
match canceled, https://fbref.com/en/matches/7c1066b8/Dijon-Amiens-March-21-2020-Ligue-1     
match canceled, https://fbref.com/en/matches/44a73b72/Saint-Etienne-Angers-April-26-2020-Ligue-1
match canceled, https://fbref.com/en/matches/8fcf6aa2/Toulouse-Montpellier-May-23-2020-Ligue-1
match canceled, https://fbref.com/en/matches/e0e0f972/Bordeaux-Lille-May-23-2020-Ligue-1     
match canceled, https://fbref.com/en/matches/cebe55f6/Amiens-Saint-Etienne-May-2-2020-Ligue-1
match canceled, https://fbref.com/en/matches/47ef0ef7/Nice-Saint-Etienne-May-16-2020-Ligue-1 
match canceled, https://fbref.com/en/matches/8e059c84/Toulouse-Saint-Etienne-April-4-2020-Ligue-1
match canceled, https://fbref.com/en/matches/b107c312/Brest-Marseille-April-5-2020-Ligue-1   
match canceled, https://fbref.com/en/matches/59d303b3/Lille-Monaco-March-20-2020-Ligue-1     
match canceled, https://fbref.com/en/matches/15af867b/Reims-Nantes-March-22-2020-Ligue-1     
match canceled, https://fbref.com/en/matches/4539502a/Angers-Paris-Saint-Germain-April-11-2020-Ligue-1
match canceled, https://fbref.com/en/matches/fc542da7/Reims-Amiens-May-9-2020-Ligue-1        
match canceled, https://fbref.com/en/matches/6da7fab9/Dijon-Angers-April-18-2020-Ligue-1     
match canceled, https://fbref.com/en/matches/93f7437a/Nice-Nantes-May-2-2020-Ligue-1         
match canceled, https://fbref.com/en/matches/dd0f8fc6/Lyon-Brest-May-23-2020-Ligue-1         
match canceled, https://fbref.com/en/matches/84b29f19/Bordeaux-Rennes-March-15-2020-Ligue-1  
match canceled, https://fbref.com/en/matches/98d0c3f5/Paris-Saint-Germain-Nice-March-15-2020-Ligue-1
match canceled, https://fbref.com/en/matches/30d69d30/Montpellier-Lille-April-4-2020-Ligue-1 
match canceled, https://fbref.com/en/matches/94873d92/Dijon-Nice-April-5-2020-Ligue-1        
match canceled, https://fbref.com/en/matches/5fa33756/Nimes-Strasbourg-May-9-2020-Ligue-1    
match canceled, https://fbref.com/en/matches/900d207e/Bordeaux-Amiens-April-11-2020-Ligue-1  
match canceled, https://fbref.com/en/matches/7024e6a9/Nantes-Lyon-April-11-2020-Ligue-1      
match canceled, https://fbref.com/en/matches/a248011a/Amiens-Nice-May-23-2020-Ligue-1        
match canceled, https://fbref.com/en/matches/dc3dc940/Lyon-Monaco-April-26-2020-Ligue-1      
match canceled, https://fbref.com/en/matches/7f9ca85d/Rennes-Monaco-May-23-2020-Ligue-1      
match canceled, https://fbref.com/en/matches/9a7e6ab4/Nimes-Bordeaux-March-21-2020-Ligue-1   
match canceled, https://fbref.com/en/matches/9a6f4349/Strasbourg-Dijon-March-14-2020-Ligue-1 
match canceled, https://fbref.com/en/matches/a6220762/Saint-Etienne-Rennes-April-11-2020-Ligue-1
match canceled, https://fbref.com/en/matches/cfc6d72b/Monaco-Saint-Etienne-March-15-2020-Ligue-1
match canceled, https://fbref.com/en/matches/0835d760/Saint-Etienne-Lille-May-9-2020-Ligue-1 
match canceled, https://fbref.com/en/matches/6f7a911b/Dijon-Reims-May-16-2020-Ligue-1        
match canceled, https://fbref.com/en/matches/aeffdfae/Paris-Saint-Germain-Rennes-May-16-2020-Ligue-1
match canceled, https://fbref.com/en/matches/39ea3e29/Nimes-Lille-April-26-2020-Ligue-1      
match canceled, https://fbref.com/en/matches/954c95bd/Rennes-Lyon-March-21-2020-Ligue-1      
match canceled, https://fbref.com/en/matches/d111b826/Amiens-Angers-March-14-2020-Ligue-1    
match canceled, https://fbref.com/en/matches/0cbe7254/Metz-Angers-May-23-2020-Ligue-1        
match canceled, https://fbref.com/en/matches/25b47edc/Monaco-Bordeaux-April-18-2020-Ligue-1  
match canceled, https://fbref.com/en/matches/0331f622/Strasbourg-Angers-April-5-2020-Ligue-1 
match canceled, https://fbref.com/en/matches/c6851d6c/Nice-Montpellier-March-21-2020-Ligue-1 
match canceled, https://fbref.com/en/matches/6921eaba/Metz-Dijon-April-26-2020-Ligue-1       
match canceled, https://fbref.com/en/matches/a2b504b7/Amiens-Rennes-April-5-2020-Ligue-1     
match canceled, https://fbref.com/en/matches/5959f855/Monaco-Metz-May-2-2020-Ligue-1         
finished with 2019 for Ligue 1                                                               
Gathering match links 2012, Serie A
starting Serie A, 2012
Exception Unexpected Error, 'NoneType' object has no attribute 'find'                        
failed attempt Serie A, 2012, https://fbref.com/en/matches/c5d02381/Napoli-Parma-September-16-2012-Serie-A
Exception Unexpected Error, 'NoneType' object has no attribute 'find'                        
failed attempt Serie A, 2012, https://fbref.com/en/matches/8f974e95/Cagliari-Genoa-January-13-2013-Serie-A
Exception Unexpected Error, 'NoneType' object has no attribute 'find'                        
failed attempt Serie A, 2012, https://fbref.com/en/matches/734ca415/Torino-Fiorentina-November-25-2012-Serie-A
Exception Unexpected Error, 'NoneType' object has no attribute 'find'                        
failed attempt Serie A, 2012, https://fbref.com/en/matches/00a51003/Fiorentina-Internazionale-February-17-2013-Serie-A
Exception Unexpected Error, 'NoneType' object has no attribute 'find'                        
failed attempt Serie A, 2012, https://fbref.com/en/matches/b449f479/Udinese-Napoli-February-25-2013-Serie-A
Exception Unexpected Error, 'NoneType' object has no attribute 'find'                        
failed 5 times, skipping Serie A, 2012
finished with 2012 for Serie A                           
Gathering match links 2013, Serie A
starting Serie A, 2013
finished with 2013 for Serie A                           
Gathering match links 2021, Serie A
starting Serie A, 2021
finished with 2021 for Serie A                           

    '''

SyntaxError: incomplete input (796969941.py, line 1)

In [None]:
league_info = pd.read_pickle('C:\\Users\\Alec\\Documents\\Python\\soccer-ai\\data_files\\matches\\Eredivisie_FBRef_scrape.pkl')

In [None]:
league_info[['stage', 'league', 'year']].groupby(['league', 'year']).count()