# FPL Prices Predicition Model

## Data Preprocessing

In [159]:
import numpy as np
import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import Ridge

'season_x', 'name', 'position', 'team_x', 'assists', 'bonus', 'bps',
       'clean_sheets', 'creativity', 'element', 'fixture', 'goals_conceded',
       'goals_scored', 'ict_index', 'influence', 'kickoff_time', 'minutes',
       'opponent_team', 'opp_team_name', 'own_goals', 'penalties_missed',
       'penalties_saved', 'red_cards', 'round', 'saves', 'selected',
       'team_a_score', 'team_h_score', 'threat', 'total_points',
       'transfers_balance', 'transfers_in', 'transfers_out', 'value',
       'was_home', 'yellow_cards', 'GW'

In [160]:
cleaned_players = pd.read_csv("./clean_data/cleaned_merged_seasons.csv")
common_features = ['season_x', 'name', 'position', 'assists',
       'clean_sheets', 'creativity', 'goals_conceded',
       'goals_scored', 'ict_index', 'influence', 'minutes',
       'own_goals', 
       'penalties_saved', 'red_cards', 'round', 'saves', 'selected',
       'threat', 'total_points',
       'value',
       'yellow_cards']
cleaned_players = cleaned_players[common_features]

  cleaned_players = pd.read_csv("./clean_data/cleaned_merged_seasons.csv")


In [161]:
gk_features = ['season_x', 'name', 'position',
       'clean_sheets', 'goals_conceded',
       'influence', 'minutes',
       'own_goals', 
       'penalties_saved', 'round', 'saves', 'selected',
       'total_points',
       'value',
       ]
defender_features = ['season_x', 'name', 'position', 'assists',
       'clean_sheets', 'ict_index', 'goals_conceded',
       'goals_scored', 'influence', 'minutes',
       'own_goals', 
       'red_cards', 'round', 'saves', 'selected',
       'threat', 'total_points',
       'value',
       'yellow_cards']
mid_features = ['season_x', 'name', 'position', 'assists',
       'creativity', 'ict_index',
       'goals_scored', 'influence', 'minutes',
       'red_cards', 'round', 'selected',
       'threat', 'total_points',
       'value',
       'yellow_cards']
fwd_features = ['season_x', 'name', 'position', 'assists',
       'creativity', 'ict_index',
       'goals_scored', 'influence', 'minutes',
       'round', 'selected',
       'threat', 'total_points',
       'value',
       ]

In [162]:
gks = cleaned_players[cleaned_players['position'] == 'GK'][gk_features]
defenders = cleaned_players[cleaned_players['position'] == 'DEF'][defender_features]
mids = cleaned_players[cleaned_players['position'] == 'MID'][mid_features]
fwds = cleaned_players[cleaned_players['position'] == 'FWD'][fwd_features]
ds_list = [gks, defenders, mids, fwds]

In [163]:
for i, ds in enumerate(ds_list):
    ds['player_id'] = ds['name'].astype('category').cat.codes
    ds['season'] = ds['season_x'].apply(lambda x: int(x.split('-')[1]))
    ds.sort_values(by=['season', 'player_id', 'round'], inplace=True)
    ds.drop(columns=['name', 'season_x', 'position'], inplace=True)

In [164]:
def update_ds(df, n):
    stats_columns = [col for col in df.columns if col not in ['round', 'player_id', 'season', 'value']]
    df['next_game_value'] = df.groupby(['season', 'player_id'])['value'].shift(-n)
    df['value_change'] = df['next_game_value'] - df['value']
    for col in stats_columns:
        df[col] = (
            df.groupby(['season', 'player_id'])[col]
            .apply(lambda x: x.shift(-n + 1).rolling(window=n, min_periods=1).sum()).reset_index(level=[0, 1], drop=True)
        )
    return df

In [165]:
for ds in ds_list:
    ds = update_ds(ds, 1)

In [166]:
gk_features = ['clean_sheets', 'goals_conceded',
       'influence', 'minutes',
       'own_goals', 
       'penalties_saved', 'round', 'saves', 'selected',
       'total_points',
       #'value',
       ]
defender_features = ['assists',
       'clean_sheets', 'ict_index', 'goals_conceded',
       'goals_scored', 'influence', 'minutes',
       'own_goals', 
       'red_cards', 'round', 'saves', 'selected',
       'threat', 'total_points',
       #'value',
       'yellow_cards']
mid_features = ['assists',
       'creativity', 'ict_index',
       'goals_scored', 'influence', 'minutes',
       'red_cards', 'round', 'selected',
       'threat', 'total_points',
       #'value',
       'yellow_cards']
fwd_features = ['assists',
       'creativity', 'ict_index',
       'goals_scored', 'influence', 'minutes',
       'round', 'selected',
       'threat', 'total_points',
       #'value',
       ]

In [168]:
pos_list = ['gk', 'def', 'mid', 'fwd']
ds_features = [gk_features, defender_features, mid_features, fwd_features]
for i, ds in enumerate(ds_list):
    ds.dropna(inplace=True)
    X, y = ds[ds_features[i]], ds["value_change"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
    model = Ridge()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(pos_list[i], " absolute error: ", mean_absolute_error(y_test, y_pred))

gk  absolute error:  0.081826114424121
def  absolute error:  0.1365800414430101
mid  absolute error:  0.14748202227653653
fwd  absolute error:  0.1991280779519458
