In [2]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/epl-player-stats/england-premier-league-players-2010-2023-stats.csv
/kaggle/input/epl-player-stats/epl_without_230_columns.xlsx


In [2]:
player_df = pd.read_csv('/kaggle/input/epl-player-stats/england-premier-league-players-2010-2023-stats.csv').drop(columns=["Unnamed: 0"], errors="ignore")
player_df.shape

(8112, 269)

In [3]:
missing = player_df.isnull().sum()*100/player_df.shape[0]
player_df.columns[missing>70].shape[0]

222

In [4]:
full_df = player_df[player_df.columns[missing<70]]
final_df = full_df[full_df['minutes_played_overall']>1000]
final_df = final_df[~final_df['season'].isin(['2020/2021','2021/2022','2022/2023'])]

final_df.groupby('season').full_name.count()

season
2010/2011    308
2011/2012    312
2012/2013    318
2013/2014    320
2014/2015    304
2015/2016    311
2016/2017    308
2017/2018    319
2018/2019    305
2019/2020    307
Name: full_name, dtype: int64

In [5]:
final_df['birthday_GMT']= pd.to_datetime(final_df['birthday_GMT'])
final_df['age'] = pd.to_datetime(final_df['season'].str.slice(0, 4)).dt.year - final_df['birthday_GMT'].dt.year
final_df.drop(['birthday','birthday_GMT','league'],axis=1,inplace=True)

In [6]:
final_df.shape

(3112, 44)

In [13]:
corr_df = final_df[final_df.select_dtypes(include=['number']).columns].corr()
mask=np.triu(np.ones(corr_df.shape), k=1).astype('bool')
tri_df = corr_df.where(mask)

corr_table = tri_df.stack().reset_index()
corr_table.columns = ['feature1', 'feature2', 'correlation']
corr_table = corr_table.sort_values('correlation', ascending=False)
hi_corr_table = corr_table[corr_table['correlation']>0.8]

In [15]:
filtered_df = final_df.drop(['minutes_played_overall', 'appearances_overall', 'goals_overall',
                 'goals_per_90_home', 'goals_per_90_away', 'assists_overall','rank_in_league_top_defenders', 'rank_in_club_top_scorer',
                 'rank_in_league_top_attackers', 'rank_in_league_top_midfielders','conceded_overall','clean_sheets_overall', 
                 'cards_per_90_overall'], axis=1)

Index(['full_name', 'age', 'season', 'position', 'Current Club',
       'minutes_played_home', 'minutes_played_away', 'nationality',
       'appearances_home', 'appearances_away', 'goals_home', 'goals_away',
       'assists_home', 'assists_away', 'penalty_goals', 'penalty_misses',
       'clean_sheets_home', 'clean_sheets_away', 'conceded_home',
       'conceded_away', 'yellow_cards_overall', 'red_cards_overall',
       'goals_involved_per_90_overall', 'assists_per_90_overall',
       'goals_per_90_overall', 'min_per_goal_overall',
       'conceded_per_90_overall', 'min_per_conceded_overall', 'min_per_match',
       'min_per_card_overall', 'min_per_assist_overall'],
      dtype='object')

In [3]:
toty_df = pd.DataFrame(data={
    2011: ['Carlos Alberto Tevez','Dimitar Ivanov Berbatov','Gareth Bale','Samir Nasri','Jack Wilshere','Nani','Ashley Cole','Nemanja Vidić','Vincent Kompany','Bacary Sagna','Edwin van der Sar'],
    2012: ['Robin van Persie','Wayne Rooney','Gareth Bale','Yaya Touré','Scott Parker','David Silva','Leighton Baines','Vincent Kompany','Fabricio Coloccini','Kyle Walker','Joe Hart'], 
    2013: ['Robin van Persie','Luis Suárez','Gareth Bale','Juan Mata','Michael Carrick','Eden Hazard','Leighton Baines','Jan Vertonghen','Rio Ferdinand','Pablo Zabaleta','David de Gea'], 
    2014: ['Luis Suárez','Daniel Sturridge','Eden Hazard','Yaya Touré','Steven Gerrard','Adam Lallana','Luke Shaw','Vincent Kompany','Gary Cahill','Séamus Coleman','Peter Čech'], 
    2015: ['Diego Costa','Harry Kane','Alexis Sanchez','Nemanja Matić','Philippe Coutinho','Eden Hazard','Ryan Bertrand','John Terry','Gary Cahill','Branislav Ivanović','David de Gea'], 
    2016: ['Jamie Vardy','Harry Kane','Dimitri Payet',"N'Golo Kanté",'Dele Alli','Riyad Mahrez','Danny Rose','Toby Alderweireld','Wes Morgan','Hector Bellerin','David de Gea'], 
    2017: ['Harry Kane','Romelu Lukaku','Sadio Mané','Dele Alli',"N'Golo Kanté",'Eden Hazard','Danny Rose','David Luiz','Gary Cahill','Kyle Walker','David de Gea'], 
    2018: ['Sergio Aguero','Harry Kane','Mohamed Salah','David Silva','Kevin De Bruyne','Christian Eriksen','Marcus Alonso','Jan Vertonghen','Nicolas Otamendi','Kyle Walker','David de Gea'], 
    2019: ['Sadio Mané','Sergio Aguero','Raheem Sterling','Paul Pogba','Fernandinho','Bernardo Silva','Andrew Robertson','Virgil van Dijk','Aymeric Laporte','Trent Alexander-Arnold','Ederson Moraes'], 
    2020: ['Sadio Mané','Jamie Vardy','Pierre-Emerick Aubameyang','David Silva','Jordan Henderson','Kevin De Bruyne','Andrew Robertson','Virgil van Dijk','Caglar Söyüncü','Trent Alexander-Arnold','Nick Pope']
})
toty_df

Unnamed: 0,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Carlos Alberto Tevez,Robin van Persie,Robin van Persie,Luis Suárez,Diego Costa,Jamie Vardy,Harry Kane,Sergio Aguero,Sadio Mané,Sadio Mané
1,Dimitar Ivanov Berbatov,Wayne Rooney,Luis Suárez,Daniel Sturridge,Harry Kane,Harry Kane,Romelu Lukaku,Harry Kane,Sergio Aguero,Jamie Vardy
2,Gareth Bale,Gareth Bale,Gareth Bale,Eden Hazard,Alexis Sanchez,Dimitri Payet,Sadio Mané,Mohamed Salah,Raheem Sterling,Pierre-Emerick Aubameyang
3,Samir Nasri,Yaya Touré,Juan Mata,Yaya Touré,Nemanja Matić,N'Golo Kanté,Dele Alli,David Silva,Paul Pogba,David Silva
4,Jack Wilshere,Scott Parker,Michael Carrick,Steven Gerrard,Philippe Coutinho,Dele Alli,N'Golo Kanté,Kevin De Bruyne,Fernandinho,Jordan Henderson
5,Nani,David Silva,Eden Hazard,Adam Lallana,Eden Hazard,Riyad Mahrez,Eden Hazard,Christian Eriksen,Bernardo Silva,Kevin De Bruyne
6,Ashley Cole,Leighton Baines,Leighton Baines,Luke Shaw,Ryan Bertrand,Danny Rose,Danny Rose,Marcus Alonso,Andrew Robertson,Andrew Robertson
7,Nemanja Vidić,Vincent Kompany,Jan Vertonghen,Vincent Kompany,John Terry,Toby Alderweireld,David Luiz,Jan Vertonghen,Virgil van Dijk,Virgil van Dijk
8,Vincent Kompany,Fabricio Coloccini,Rio Ferdinand,Gary Cahill,Gary Cahill,Wes Morgan,Gary Cahill,Nicolas Otamendi,Aymeric Laporte,Caglar Söyüncü
9,Bacary Sagna,Kyle Walker,Pablo Zabaleta,Séamus Coleman,Branislav Ivanović,Hector Bellerin,Kyle Walker,Kyle Walker,Trent Alexander-Arnold,Trent Alexander-Arnold
