In [1]:
import pandas as pd
import numpy as np

pd.options.mode.chained_assignment = None
pd.set_option("display.precision", 3)
pd.set_option('future.no_silent_downcasting', True)

In [2]:
from football import midfielder
from football import main

affordable_mids = midfielder.affordable_mids_for_year(main.start_year)

for x in range(main.start_year+1, main.current_year+1):
  second_df = midfielder.affordable_mids_for_year(x)
  affordable_mids = pd.merge(
      left=affordable_mids,
      right=second_df,
      how='outer',
      left_on=['first_name', 'second_name'],
      right_on=['first_name', 'second_name'],
  )

# Drop NA in any of two columns
# filters out unavailable now
# filter out newcomers with outlying results
last_year = main.current_year - 1
affordable_mids.dropna(subset=[last_year, main.current_year], inplace=True)

affordable_mids = affordable_mids.apply(main.chance_of_playing_this_round, axis=1)
affordable_mids.dropna(subset=["chance_to_play"], inplace=True)

unavailable_mids = affordable_mids[affordable_mids["chance_to_play"] < 75]
affordable_mids.drop(unavailable_mids.index, inplace=True)

# # Show top 10 mids w/ precision set to 1
affordable_mids.sort_values(by=last_year, ascending=False, inplace=True)
affordable_mids

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play
397,Reiss,Nelson,,19.000,67.333,256.000,63.000,100
269,Leandro,Trossard,86.733,84.939,58.868,163.300,62.800,100
459,Thomas,Partey,84.944,81.080,88.571,157.000,106.800,100
107,Declan,Rice,77.838,64.857,65.440,115.179,80.400,100
123,Emile,Smith Rowe,143.800,68.179,32.200,114.333,148.000,100
...,...,...,...,...,...,...,...,...
284,Luis,Sinisterra,,,,38.056,49.667,100
325,Maxwel,Cornet,,73.500,46.400,36.333,17.750,100
255,Kalvin,Phillips,71.412,63.760,97.000,30.308,75.667,75
8,Adama,TraorÃ©,64.390,106.900,,27.923,116.500,100


In [3]:
# Predict stat using previous data
from football import sklearn
predicted_mids = affordable_mids.copy().apply(sklearn.predict_this_year_linear, axis=1)

# Drop if prediction score is lower than 0.5 
# predicted_mids.drop(predicted_mids[predicted_mids["prediction_linear_score"] < 0].index, inplace=True)
# Sort by prediction
predicted_mids.sort_values(by="prediction_linear", ascending=False, inplace=True)
predicted_mids.head()

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play,prediction_linear,prediction_linear_score
397,Reiss,Nelson,,19.0,67.333,256.0,63.0,100,351.111,-1.589
459,Thomas,Partey,84.944,81.08,88.571,157.0,106.8,100,158.813,-0.049
269,Leandro,Trossard,86.733,84.939,58.868,163.3,62.8,100,149.368,-0.619
381,Pape Matar,Sarr,,,19.182,76.407,100.0,100,133.633,0.673
37,Anthony,Elanga,77.5,71.059,208.5,51.574,88.0,100,117.074,-0.032


In [4]:
predicted_mids_svr = predicted_mids.apply(sklearn.predict_this_year_svr, axis=1)

# Drop if prediction score is lower than 0
# predicted_mids.drop(predicted_mids[predicted_mids["prediction_svr_score"] < 0].index, inplace=True)
# Sort by prediction
predicted_mids_svr.sort_values(by="prediction_svr", ascending=False, inplace=True)
predicted_mids_svr.head()

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play,prediction_linear,prediction_linear_score,prediction_svr,prediction_svr_score
71,Callum,Hudson-Odoi,210.0,119.75,,45.024,133.0,100,-15.004,-0.658,118.775,0.006
312,Mateo,KovaÄiÄ,,,,109.929,72.167,100,109.929,-1.0,109.929,-1.0
114,Diogo,Teixeira da Silva,,,,103.727,373.0,100,103.727,-1.0,103.727,-1.0
187,Jack,Grealish,77.964,238.75,128.188,76.769,88.333,100,101.881,0.1,102.444,-0.078
54,Bernardo,Veiga de Carvalho e Silva,,,91.5,99.077,91.2,100,106.654,-4.993,95.307,0.181


# Good holding midfilder

In [5]:
# Get current season stats to calculate performance
mid_data = predicted_mids_svr.copy()
mid_data = mid_data.apply(midfielder.expected_vs_actual_goals_conceded, axis=1)

# Drop if performes much worse
mid_data.drop(mid_data[mid_data["overperformed_goals"] < -1].index, inplace=True)
mid_data.dropna(how='all', inplace=True)
mid_data

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play,prediction_linear,prediction_linear_score,prediction_svr,prediction_svr_score,overperformed_goals
71,Callum,Hudson-Odoi,210.000,119.75,,45.024,133.000,100,-15.004,-0.658,118.775,0.006,0.63
312,Mateo,KovaÄiÄ,,,,109.929,72.167,100,109.929,-1.000,109.929,-1.000,
114,Diogo,Teixeira da Silva,,,,103.727,373.000,100,103.727,-1.000,103.727,-1.000,0.41
187,Jack,Grealish,77.964,238.75,128.188,76.769,88.333,100,101.881,0.100,102.444,-0.078,0.57
54,Bernardo,Veiga de Carvalho e Silva,,,91.500,99.077,91.200,100,106.654,-4.993,95.307,0.181,1.01
...,...,...,...,...,...,...,...,...,...,...,...,...,...
246,JoÃ£o Victor,Gomes da Silva,,,36.056,51.882,35.214,100,67.709,-4.988,43.987,0.019,
337,Mohammed,Kudus,,,,40.738,49.200,100,40.738,-1.000,40.738,-1.000,0.03
16,Alex,Scott,,,,40.120,56.333,100,40.120,-1.000,40.120,-1.000,0.02
212,Jean-Ricner,Bellegarde,,,,39.500,48.625,100,39.500,-1.000,39.500,-1.000,-0.01


In [6]:
# mix in player stats
mids = main.player_type_for_year(main.current_year, player_type="MID")
player_info = pd.merge(
      left=mid_data,
      right=mids,
      how='left',
      left_on=['first_name', 'second_name'],
      right_on=['first_name', 'second_name'],
  )
player_custom_info = player_info[['first_name', 'second_name', main.current_year, 'prediction_linear', 'prediction_svr', 'overperformed_goals', 'now_cost', 'goals_conceded', 'minutes', 'assists', 'total_points', 'influence', 'creativity', 'bonus', 'bps','selected_by_percent', ]]
player_custom_info.head()

Unnamed: 0,first_name,second_name,2024,prediction_linear,prediction_svr,overperformed_goals,now_cost,goals_conceded,minutes,assists,total_points,influence,creativity,bonus,bps,selected_by_percent
0,Callum,Hudson-Odoi,133.0,-15.004,118.775,0.63,54,3,399,1,20,82.8,127.8,1,94,2.4
1,Mateo,KovaÄiÄ,72.167,109.929,109.929,,55,6,433,1,21,107.6,136.4,4,113,1.6
2,Diogo,Teixeira da Silva,373.0,103.727,103.727,0.41,75,1,373,3,26,75.4,50.0,1,64,9.0
3,Jack,Grealish,88.333,101.881,102.444,0.57,64,3,265,1,12,58.6,137.5,3,69,0.3
4,Bernardo,Veiga de Carvalho e Silva,91.2,106.654,95.307,1.01,65,5,456,2,19,79.2,181.5,2,91,5.1
