In [1]:
import pandas as pd
import numpy as np

pd.options.mode.chained_assignment = None
pd.set_option("display.precision", 3)
pd.set_option('future.no_silent_downcasting', True)

In [2]:
from football import defender
from football import main

affordable_defs = defender.affordable_defs_for_year(main.start_year)

for x in range(main.start_year+1, main.current_year+1):
  second_df = defender.affordable_defs_for_year(x)
  affordable_defs = pd.merge(
      left=affordable_defs,
      right=second_df,
      how='outer',
      left_on=['first_name', 'second_name'],
      right_on=['first_name', 'second_name'],
  )

# Drop NA in any of two columns
# filters out unavailable now
# filter out newcomers with outlying results
last_year = main.current_year - 1
affordable_defs.dropna(subset=[last_year, main.current_year], inplace=True)

affordable_defs = affordable_defs.apply(main.chance_of_playing_this_round, axis=1)
affordable_defs.dropna(subset=["chance_to_play"], inplace=True)

unavailable_defs = affordable_defs[affordable_defs["chance_to_play"] < 75]
affordable_defs.drop(unavailable_defs.index, inplace=True)

# # Show top 10 Defs w/ precision set to 1
affordable_defs.sort_values(by=last_year, ascending=False, inplace=True)
affordable_defs

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play
377,William,Saliba,,,96.600,117.931,108.000,100
176,John,Stones,161.083,186.167,108.471,117.889,135.000,100
148,Jakub,Kiwior,,,70.833,117.750,16.000,100
117,Gabriel,dos Santos MagalhÃ£es,,,79.279,117.000,108.000,100
39,Benjamin,White,,,87.257,114.885,202.500,100
...,...,...,...,...,...,...,...,...
375,VladimÃ­r,Coufal,,,,44.786,66.000,100
196,Kenny,Tete,54.704,,71.361,41.095,108.000,100
76,Dara,O'Shea,41.157,,,40.732,90.000,100
34,Ben,Johnson,51.600,77.611,66.154,33.625,21.833,100


In [3]:
# Predict stat using previous data
from football import sklearn
predicted_defs = affordable_defs.copy().apply(sklearn.predict_this_year_linear, axis=1)

# Drop if prediction score is lower than 0.5 
# predicted_defs.drop(predicted_defs[predicted_defs["prediction_linear_score"] < 0].index, inplace=True)
# Sort by prediction
predicted_defs.sort_values(by="prediction_linear", ascending=False, inplace=True)
predicted_defs.head()

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play,prediction_linear,prediction_linear_score
148,Jakub,Kiwior,,,70.833,117.75,16.0,100,164.667,-3.261
117,Gabriel,dos Santos MagalhÃ£es,,,79.279,117.0,108.0,100,154.721,-1.812
39,Benjamin,White,,,87.257,114.885,202.5,100,142.512,0.503
377,William,Saliba,,,96.6,117.931,108.0,100,139.262,-3.289
238,Manuel,Akanji,,,99.304,109.217,90.0,100,119.13,-3.594


In [5]:
predicted_defs_svr = predicted_defs.apply(sklearn.predict_this_year_svr, axis=1)

# Drop if prediction score is lower than 0
# predicted_defs.drop(predicted_defs[predicted_defs["prediction_svr_score"] < 0].index, inplace=True)
# Sort by prediction
predicted_defs_svr.sort_values(by="prediction_svr", ascending=False, inplace=True)
predicted_defs_svr.head()

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play,prediction_linear,prediction_linear_score,prediction_svr,prediction_svr_score
176,John,Stones,161.083,186.167,108.471,117.889,135.0,100,91.583,0.079,138.997,0.06
325,RÃºben,Gato Alves Dias,,,124.812,91.321,91.2,100,57.83,-0.484,108.049,-0.04
62,Conor,Bradley,,,,107.429,75.0,100,107.429,-1.0,107.429,-1.0
377,William,Saliba,,,96.6,117.931,108.0,100,139.262,-3.289,107.284,0.175
238,Manuel,Akanji,,,99.304,109.217,90.0,100,119.13,-3.594,104.279,-0.275


# Good holding defender

In [6]:
# Get current season stats to calculate performance
def_data = predicted_defs_svr.copy()
def_data = def_data.apply(defender.expected_vs_actual_goals_conceded, axis=1)

# Drop if performes much worse
def_data.drop(def_data[def_data["overperformed_goals"] < -1].index, inplace=True)
def_data.dropna(how='all', inplace=True)
def_data

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play,prediction_linear,prediction_linear_score,prediction_svr,prediction_svr_score,overperformed_goals
176,John,Stones,161.083,186.167,108.471,117.889,135.000,100,91.583,0.079,138.997,0.060,0.06
325,RÃºben,Gato Alves Dias,,,124.812,91.321,91.200,100,57.830,-0.484,108.049,-0.040,
62,Conor,Bradley,,,,107.429,75.000,100,107.429,-1.000,107.429,-1.000,0.04
377,William,Saliba,,,96.600,117.931,108.000,100,139.262,-3.289,107.284,0.175,0.47
238,Manuel,Akanji,,,99.304,109.217,90.000,100,119.130,-3.594,104.279,-0.275,1.01
...,...,...,...,...,...,...,...,...,...,...,...,...,...
243,Marcos,Senesi,,,45.255,50.886,69.200,100,56.518,0.487,48.089,-0.443,-0.40
161,Jarrad,Branthwaite,,22.765,,70.818,90.000,75,94.845,0.990,47.159,-0.208,-0.05
92,Emerson,Palmieri dos Santos,,,47.036,46.836,46.778,100,46.636,0.451,46.936,-0.227,0.03
330,Santiago,Bueno,,,,45.500,21.000,100,45.500,-1.000,45.500,-1.000,0.39


In [7]:
# mix in player stats
defs = main.player_type_for_year(main.current_year, player_type="DEF")
player_info = pd.merge(
      left=def_data,
      right=defs,
      how='left',
      left_on=['first_name', 'second_name'],
      right_on=['first_name', 'second_name'],
  )
player_custom_info = player_info[['first_name', 'second_name', main.current_year, 'prediction_linear', 'prediction_svr', 'overperformed_goals', 'now_cost', 'goals_conceded', 'minutes', 'assists', 'total_points', 'influence', 'creativity', 'bonus', 'bps','selected_by_percent', ]]
player_custom_info.head()

Unnamed: 0,first_name,second_name,2024,prediction_linear,prediction_svr,overperformed_goals,now_cost,goals_conceded,minutes,assists,total_points,influence,creativity,bonus,bps,selected_by_percent
0,John,Stones,135.0,91.583,138.997,0.06,53,1,135,0,9,39.0,9.3,0,24,1.7
1,RÃºben,Gato Alves Dias,91.2,57.83,108.049,,55,5,456,0,9,79.4,61.1,0,52,4.0
2,Conor,Bradley,75.0,107.429,107.429,0.04,48,1,75,0,4,17.8,6.5,0,11,0.3
3,William,Saliba,108.0,139.262,107.284,0.47,60,5,540,0,20,106.0,5.6,0,93,33.3
4,Manuel,Akanji,90.0,119.13,104.279,1.01,55,6,540,0,14,109.8,122.4,0,83,5.1
