In [1]:
import pandas as pd
import numpy as np

pd.options.mode.chained_assignment = None
pd.set_option("display.precision", 1)


def getMinutesPerConceded(row: pd.Series) -> pd.Series:
  if row["goals_conceded"] == 0:
    return np.nan
  return row['minutes'] / row['goals_conceded']

def affordable_gks_for_year(folderlabel: str) -> pd.DataFrame:
  df = pd.read_csv(f"data/{folderlabel}/cleaned_players.csv", encoding = "iso-8859-1")
  gks = df[df["element_type"]=="GK"]
  played_gks = gks.loc[df['minutes'] != 0]

  year = year_from_foldername(folderlabel)
  played_gks[year] = played_gks.apply(getMinutesPerConceded, axis=1)
  played_gks.dropna(subset=[year], inplace=True)
  best_gks = played_gks.sort_values(by=year, ascending=False)
  return best_gks[['first_name', 'second_name', year]]

def foldername_from_year(year: int) -> str:
  next_year = year + 1 - 2000
  return f"{year}-{next_year}"

def year_from_foldername(foldername: str) -> int:
  return int(foldername.split("-")[0])

In [2]:
start_year = 2020
last_year = 2024

df = affordable_gks_for_year(foldername_from_year(start_year))

for x in range(start_year+1, last_year+1):
  foldername = foldername_from_year(x)
  second_df = affordable_gks_for_year(foldername)
  df = pd.merge(
      left=df,
      right=second_df,
      how='outer',
      left_on=['first_name', 'second_name'],
      right_on=['first_name', 'second_name'],
  )

# Drop NA in any of two columns
# filters out unavailable now
# filter out newcomers with outlying results
previous_year = last_year - 1
df.dropna(subset=[previous_year, last_year], inplace=True)

# Replace other NA with 0
df.fillna(0, inplace=True)

# Show top 10 GKs w/ precision set to 1
pd.set_option("display.precision", 1)
df.sort_values(by=previous_year, ascending=False, inplace=True)
df.head()

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024
22,David,Raya Martin,0.0,80.0,74.3,120.0,108.0
0,Aaron,Ramsdale,54.3,78.5,79.5,108.0,36.0
25,Ederson,Santana de Moraes,115.7,128.1,98.4,103.1,90.0
62,Nick,Pope,77.8,68.9,101.9,84.1,77.1
3,Alisson,Ramses Becker,92.8,135.0,77.4,84.0,225.0
