In [1]:
import pandas as pd
import numpy as np

pd.options.mode.chained_assignment = None
pd.set_option("display.precision", 3)
pd.set_option('future.no_silent_downcasting', True)

In [2]:
from football import defender
from football import main

affordable_defs = defender.affordable_support_defs_for_year(main.start_year)

for x in range(main.start_year+1, main.current_year+1):
  second_df = defender.affordable_support_defs_for_year(x)
  affordable_defs = pd.merge(
      left=affordable_defs,
      right=second_df,
      how='outer',
      left_on=['first_name', 'second_name'],
      right_on=['first_name', 'second_name'],
  )

# Drop NA in any of two columns
# filters out unavailable now
# filter out newcomers with outlying results
last_year = main.current_year - 1
affordable_defs.dropna(subset=[last_year, main.current_year], inplace=True)

affordable_defs = affordable_defs.apply(main.chance_of_playing_this_round, axis=1)
affordable_defs.dropna(subset=["chance_to_play"], inplace=True)

unavailable_defs = affordable_defs[affordable_defs["chance_to_play"] < 75]
affordable_defs.drop(unavailable_defs.index, inplace=True)

# # Show top 10 Defs w/ precision set to 1
affordable_defs.sort_values(by=last_year, ascending=False, inplace=True)
affordable_defs

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play
214,Kieran,Trippier,,0.000e+00,0.003,0.004,0.000,100
216,Konstantinos,Tsimikas,0.000e+00,2.288e-03,0.005,0.004,0.000,100
64,Conor,Bradley,,,,0.004,0.000,100
249,Malo,Gusto,,,,0.003,0.000,100
157,Jakub,Kiwior,,,0.000,0.003,0.000,100
...,...,...,...,...,...,...,...,...
381,Tosin,Adarabioyo,3.386e-04,,0.000,0.000,0.000,100
332,Rico,Lewis,,,0.000,0.000,0.003,100
378,Tino,Livramento,,9.091e-04,0.000,0.000,0.000,100
350,Santiago,Bueno,,,,0.000,0.000,100


In [3]:
# Predict stat using previous data
from football import sklearn
predicted_defs = affordable_defs.copy().apply(sklearn.predict_this_year_linear, axis=1)

# Drop if prediction score is lower than 0.5 
# predicted_defs.drop(predicted_defs[predicted_defs["prediction_linear_score"] < 0].index, inplace=True)
# Sort by prediction
predicted_defs.sort_values(by="prediction_linear", ascending=False, inplace=True)
predicted_defs.head()

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play,prediction_linear,prediction_linear_score
216,Konstantinos,Tsimikas,0.0,0.002,0.005,0.004,0.0,100,0.007,-1.239
214,Kieran,Trippier,,0.0,0.003,0.004,0.0,100,0.007,-2.274
157,Jakub,Kiwior,,,0.0,0.003,0.0,100,0.006,-5.0
256,Marcos,Senesi,,,0.0,0.002,0.0,100,0.004,-5.0
136,Harry,Toffolo,,,0.0,0.002,0.0,100,0.004,-5.0


In [4]:
predicted_defs_svr = predicted_defs.apply(sklearn.predict_this_year_svr, axis=1)

# Drop if prediction score is lower than 0
predicted_defs_svr.drop(predicted_defs_svr[predicted_defs_svr["prediction_svr_score"] < -0.5].index, inplace=True)
# predicted_defs_svr.drop(predicted_defs_svr[predicted_defs_svr["prediction_svr"] == 0].index, inplace=True)
# Sort by prediction
predicted_defs_svr.sort_values(by="prediction_svr", ascending=False, inplace=True)
predicted_defs_svr.head()

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play,prediction_linear,prediction_linear_score,prediction_svr,prediction_svr_score
383,Trent,Alexander-Arnold,0.003,0.004,0.004,0.002,0.002,100,0.002884,0.148,0.003,-0.103
260,Matt,Doherty,0.002,0.006,0.0,0.0,0.0,100,-0.0008078,0.309,0.003,-0.396
19,Andrew,Robertson,0.002,0.004,0.003,0.001,0.0,100,0.001553,0.422,0.003,-0.14
216,Konstantinos,Tsimikas,0.0,0.002,0.005,0.004,0.0,100,0.007069,-1.239,0.003,-0.011
311,Pedro,Porro,,,0.003,0.003,0.0,100,0.002528,-0.396,0.003,-0.5


# Good assisting defender

In [5]:
# Get current season stats to calculate performance
def_data = predicted_defs_svr.copy()
def_data = def_data.apply(defender.expected_vs_actual_assists, axis=1)

# Drop if performes much worse
def_data.drop(def_data[def_data["overperformed_assists"] < -1].index, inplace=True)
def_data.dropna(how='all', inplace=True)
def_data

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play,prediction_linear,prediction_linear_score,prediction_svr,prediction_svr_score,overperformed_assists
383,Trent,Alexander-Arnold,0.003,0.004,0.004,0.002,0.002,100,2.884e-03,0.148,0.003,-0.103,-1.00
260,Matt,Doherty,0.002,0.006,0.000,0.000,0.000,100,-8.078e-04,0.309,0.003,-0.396,-0.01
19,Andrew,Robertson,0.002,0.004,0.003,0.001,0.000,100,1.553e-03,0.422,0.003,-0.140,-0.13
216,Konstantinos,Tsimikas,0.000,0.002,0.005,0.004,0.000,100,7.069e-03,-1.239,0.003,-0.011,-0.01
311,Pedro,Porro,,,0.003,0.003,0.000,100,2.528e-03,-0.396,0.003,-0.500,-0.32
...,...,...,...,...,...,...,...,...,...,...,...,...,...
215,Konstantinos,Mavropanos,,,,0.000,0.000,100,0.000e+00,1.000,0.000,1.000,0.00
28,Axel,Disasi,,,,0.000,0.000,100,0.000e+00,1.000,0.000,1.000,0.00
48,Calvin,Bassey,,,,0.000,0.000,100,0.000e+00,1.000,0.000,1.000,-0.03
309,Pau,Torres,,,,0.000,0.000,100,0.000e+00,1.000,0.000,1.000,-0.01


In [6]:
# mix in player stats
defs = main.player_type_for_year(main.current_year, player_type="DEF")
player_info = pd.merge(
      left=def_data,
      right=defs,
      how='left',
      left_on=['first_name', 'second_name'],
      right_on=['first_name', 'second_name'],
  )
player_custom_info = player_info[['first_name', 'second_name', main.current_year, 'prediction_linear', 'prediction_svr', 'overperformed_assists', 'now_cost', 'goals_conceded', 'minutes', 'assists', 'total_points', 'influence', 'creativity', 'bonus', 'bps','selected_by_percent', ]]
player_custom_info.head()

Unnamed: 0,first_name,second_name,2024,prediction_linear,prediction_svr,overperformed_assists,now_cost,goals_conceded,minutes,assists,total_points,influence,creativity,bonus,bps,selected_by_percent
0,Trent,Alexander-Arnold,0.002,0.002884,0.003,-1.0,71,2,492,1,33,105.4,199.9,4,122,32.1
1,Matt,Doherty,0.0,-0.0008078,0.003,-0.01,44,9,208,0,2,40.8,16.0,0,2,0.2
2,Andrew,Robertson,0.0,0.001553,0.003,-0.13,60,2,502,0,27,68.4,158.2,0,131,7.2
3,Konstantinos,Tsimikas,0.0,0.007069,0.003,-0.01,47,0,33,0,3,6.6,2.1,0,11,0.2
4,Pedro,Porro,0.0,0.002528,0.003,-0.32,55,5,537,0,25,165.6,189.4,1,116,29.8
