In [1]:
import pandas as pd
import numpy as np

pd.options.mode.chained_assignment = None
pd.set_option("display.precision", 3)
pd.set_option('future.no_silent_downcasting', True)

In [2]:
from football import midfielder
from football import main

affordable_mids = midfielder.affordable_support_mids_for_year(main.start_year)

for x in range(main.start_year+1, main.current_year+1):
  second_df = midfielder.affordable_support_mids_for_year(x)
  affordable_mids = pd.merge(
      left=affordable_mids,
      right=second_df,
      how='outer',
      left_on=['first_name', 'second_name'],
      right_on=['first_name', 'second_name'],
  )

# Drop NA in any of two columns
# filters out unavailable now
# filter out newcomers with outlying results
last_year = main.current_year - 1
affordable_mids.dropna(subset=[last_year, main.current_year], inplace=True)

affordable_mids = affordable_mids.apply(main.chance_of_playing_this_round, axis=1)
affordable_mids.dropna(subset=["chance_to_play"], inplace=True)

unavailable_mids = affordable_mids[affordable_mids["chance_to_play"] < 75]
affordable_mids.drop(unavailable_mids.index, inplace=True)

# # Show top 10 mids w/ precision set to 1
affordable_mids.sort_values(by=last_year, ascending=False, inplace=True)
affordable_mids

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play
8,Adama,TraorÃ©,0.002,9.355e-04,,0.008,0.004,100
437,Pedro,Lomba Neto,0.003,2.179e-03,0.002,0.007,0.004,100
223,Jacob,Murphy,0.002,2.745e-03,0.002,0.007,0.006,100
291,JÃ©rÃ©my,Doku,,,,0.006,0.000,100
133,Diogo,Teixeira da Silva,,,,0.006,0.008,100
...,...,...,...,...,...,...,...,...
491,SaÅ¡a,LukiÄ,,,,0.000,0.000,100
525,Thomas,Partey,0.001,9.867e-04,0.000,0.000,0.000,100
526,Tim,Iroegbunam,,0.000e+00,,0.000,0.000,100
534,Tommy,Doyle,,,,0.000,0.000,100


In [3]:
# Predict stat using previous data
from football import sklearn
predicted_mids = affordable_mids.copy().apply(sklearn.predict_this_year_linear, axis=1)

# Drop if prediction score is lower than 0.5 
# predicted_mids.drop(predicted_mids[predicted_mids["prediction_linear_score"] < 0].index, inplace=True)
# Sort by prediction
predicted_mids.sort_values(by="prediction_linear", ascending=False, inplace=True)
predicted_mids.head()

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play,prediction_linear,prediction_linear_score
305,Kevin,Schade,,,0.001,0.006,0.003,100,0.011,-3.93
143,Emile,Smith Rowe,0.003,0.001048,0.012,0.006,0.002,100,0.01,-0.472
8,Adama,TraorÃ©,0.002,0.0009355,,0.008,0.004,100,0.01,-0.161
379,Mikkel,Damsgaard,,,0.0,0.005,0.0,100,0.01,-5.0
433,Pablo,Sarabia,,,0.0,0.004,0.0,100,0.008,-5.0


In [4]:
predicted_mids_svr = predicted_mids.apply(sklearn.predict_this_year_svr, axis=1)

# Drop if prediction score is lower than 0
predicted_mids_svr.drop(predicted_mids_svr[predicted_mids_svr["prediction_svr_score"] < -0.5].index, inplace=True)
# predicted_mids_svr.drop(predicted_mids_svr[predicted_mids_svr["prediction_svr"] == 0].index, inplace=True)
# Sort by prediction
predicted_mids_svr.sort_values(by="prediction_svr", ascending=False, inplace=True)
predicted_mids_svr.head()

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play,prediction_linear,prediction_linear_score,prediction_svr,prediction_svr_score
143,Emile,Smith Rowe,0.003,0.001048,0.012,0.006,0.002,100,0.01,-0.472,0.007,-0.184
104,Cole,Palmer,,0.008264,0.003,0.005,0.008,100,0.002,-1.195,0.006,-0.042
437,Pedro,Lomba Neto,0.003,0.002179,0.002,0.007,0.004,100,0.007,0.07,0.005,-0.224
8,Adama,TraorÃ©,0.002,0.0009355,,0.008,0.004,100,0.01,-0.161,0.005,-0.086
194,Harry,Wilson,,,0.005,0.004,0.0,100,0.004,-0.265,0.004,-0.499


# Good assisting midfielder

In [5]:
# Get current season stats to calculate performance
mid_data = predicted_mids_svr.copy()
mid_data = mid_data.apply(midfielder.expected_vs_actual_assists, axis=1)

# Drop if performes much worse
mid_data.drop(mid_data[mid_data["overperformed_assists"] < -1].index, inplace=True)
mid_data.dropna(how='all', inplace=True)
mid_data

Unnamed: 0,first_name,second_name,2020,2021,2022,2023,2024,chance_to_play,prediction_linear,prediction_linear_score,prediction_svr,prediction_svr_score,overperformed_assists
143,Emile,Smith Rowe,0.003,1.048e-03,0.012,0.006,0.002,100,0.010,-0.472,0.007,-0.184,0.00
104,Cole,Palmer,,8.264e-03,0.003,0.005,0.008,100,0.002,-1.195,0.006,-0.042,-0.19
437,Pedro,Lomba Neto,0.003,2.179e-03,0.002,0.007,0.004,100,0.007,0.070,0.005,-0.224,-0.16
8,Adama,TraorÃ©,0.002,9.355e-04,,0.008,0.004,100,0.010,-0.161,0.005,-0.086,
194,Harry,Wilson,,,0.005,0.004,0.000,100,0.004,-0.265,0.004,-0.499,-0.01
...,...,...,...,...,...,...,...,...,...,...,...,...,...
420,Oliver,Skipp,,0.000e+00,0.000,0.000,0.000,100,0.000,1.000,0.000,1.000,0.00
151,Ethan,Nwaneri,,,0.000,0.000,0.000,100,0.000,1.000,0.000,1.000,-0.16
491,SaÅ¡a,LukiÄ,,,,0.000,0.000,100,0.000,1.000,0.000,1.000,
472,Ryan,Gravenberch,,,,0.000,0.000,100,0.000,1.000,0.000,1.000,-0.39


In [6]:
# mix in player stats
mids = main.player_type_for_year(main.current_year, player_type="MID")
player_info = pd.merge(
      left=mid_data,
      right=mids,
      how='left',
      left_on=['first_name', 'second_name'],
      right_on=['first_name', 'second_name'],
  )
player_custom_info = player_info[['first_name', 'second_name', main.current_year, 'prediction_linear', 'prediction_svr', 'overperformed_assists', 'now_cost', 'goals_conceded', 'minutes', 'assists', 'total_points', 'influence', 'creativity', 'bonus', 'bps','selected_by_percent', ]]
player_custom_info.head()

Unnamed: 0,first_name,second_name,2024,prediction_linear,prediction_svr,overperformed_assists,now_cost,goals_conceded,minutes,assists,total_points,influence,creativity,bonus,bps,selected_by_percent
0,Emile,Smith Rowe,0.002,0.01,0.007,0.0,58,3,444,1,33,121.6,99.5,5,120,29.8
1,Cole,Palmer,0.008,0.002,0.006,-0.19,107,7,505,4,61,330.4,228.2,7,247,44.3
2,Pedro,Lomba Neto,0.004,0.007,0.005,-0.16,63,2,239,1,8,46.0,71.6,0,31,0.6
3,Adama,TraorÃ©,0.004,0.01,0.005,,51,4,466,2,26,117.0,141.0,3,111,3.9
4,Harry,Wilson,0.0,0.004,0.004,-0.01,53,1,36,0,2,0.2,5.2,0,7,0.1
