In [21]:
import pandas as pd
import joblib
import os

In [50]:
def predict_single_player(df_player, model, window, target_cols):
    g = df_player.sort_values("season")
    if len(g) <= window:
        return None

    past = g.tail(window).copy()

    flat = {}
    features_exclude = ['player','team','season','league','pos']

    for j, (_, r) in enumerate(past.iterrows(), start=1):
        suffix = f"_t-{window-j+1}"
        for c in r.index:
            if c in features_exclude:
                continue
            flat[c + suffix] = r[c]

    last_row = past.iloc[-1]
    flat["player"] = last_row["player"]
    flat["season_target"] = int(last_row["season"]) + 1
    flat["age_at_target"] = last_row["age"] + 1
    flat["team_last"] = last_row["team"]
    flat["league_last"] = last_row["league"]

    X = pd.DataFrame([flat])
    pred = model.predict(X)[0]

    return dict(zip(target_cols, pred))

In [71]:
df = pd.read_csv("../data/processed_whole/processed_data.csv")

TARGET_SEASONS = ['Standard_Sh/90','Standard_SoT/90','Standard_SoT%', 'Standard_G/Sh',
       'Standard_G/SoT', 'Per 90 Minutes_Gls', 'Per 90 Minutes_Ast',
       'Per 90 Minutes_G+A', 'Expected_G-xG', 'Expected_A-xAG',
       'Per 90 Minutes_xG', 'Per 90 Minutes_xAG', 'Per 90 Minutes_xG+xAG',
       'Progression_PrgC', 'Progression_PrgP', 'Progression_PrgR', 'PrgP',
       'Carries_PrgC', 'Short_Cmp%', 'Medium_Cmp%', 'Long_Cmp%', '1/3', 'PPA',
       'CrsPA', 'SCA_SCA90', 'GCA_GCA90', 'Tackles_TklW', 'Challenges_Tkl%',
       'Int', 'Blocks_Blocks', 'Performance_Recov', 'Take-Ons_Att',
       'Take-Ons_Succ%', 'Carries_Mis', 'Receiving_Rec', 'Receiving_PrgR',
       'Aerial Duels_Won%']


df['season_code'] = df['season'].astype(str).str.zfill(4)

df['season'] = df['season_code'].apply(
    lambda x: int("19" + x[:2]) if int(x[:2]) > 40 else int("20" + x[:2]))

df['season'] = df['season'].astype("int") 

model_cache = {}

for w in range(3, 11):
    path = f"../Model_1/models/model_window{w}.pkl"
    if os.path.exists(path):
        model_cache[w] = joblib.load(path)

predictions = []

for player_name, df_player in df.groupby("player"):
    if df_player['season'].max() < 2024:
        continue

    df_player = df_player.sort_values("season")
    n_seasons = len(df_player)

    if n_seasons < 3:
        continue

    window = max(3, min(n_seasons - 1, 10)) 
    if window not in model_cache:
        continue
    
    model = model_cache[window]

    pred = predict_single_player(df_player, model, window, TARGET_SEASONS)
    if pred is None:
        continue

    pred["player"] = player_name
    pred["window_used"] = window
    pred["season_predicted"] = df_player["season"].max() + 1

    predictions.append(pred)

In [97]:
pred_df = pd.DataFrame(predictions)

In [98]:
cols = ['player', 'season_predicted', 'window_used'] + [c for c in pred_df.columns if c not in ["player", "window_used", "season_predicted"]]
pred_df = pred_df[cols]

In [84]:
df = pd.read_csv("../data/processed_whole/processed_data.csv")

In [99]:
pred_df.sort_values("Per 90 Minutes_Gls", ascending=False)

Unnamed: 0,player,season_predicted,window_used,Standard_Sh/90,Standard_SoT/90,Standard_SoT%,Standard_G/Sh,Standard_G/SoT,Per 90 Minutes_Gls,Per 90 Minutes_Ast,...,Challenges_Tkl%,Int,Blocks_Blocks,Performance_Recov,Take-Ons_Att,Take-Ons_Succ%,Carries_Mis,Receiving_Rec,Receiving_PrgR,Aerial Duels_Won%
417,Kylian Mbappé,2025,8,4.305820,1.937660,48.825756,0.188751,0.406855,0.877843,0.339475,...,45.575169,8.441730,14.906480,74.666756,58.821503,56.333824,62.572720,953.723267,285.472717,42.400043
277,Harry Kane,2025,10,3.646687,1.602043,43.791031,0.229903,0.386139,0.819879,0.191169,...,28.597664,4.588289,9.284013,53.824532,47.591579,51.180176,40.875923,700.165222,135.452942,44.518337
539,Mohamed Salah,2025,10,3.006994,1.384216,41.601971,0.213694,0.459609,0.751739,0.292225,...,23.979538,7.410297,17.836102,84.932434,64.669289,55.291058,60.399879,1445.976929,212.269180,39.969635
221,Erling Haaland,2025,5,3.095505,1.630184,46.579777,0.187236,0.467233,0.745277,0.149409,...,32.113998,8.917013,14.956854,54.125462,44.131447,53.292942,56.215256,624.869507,160.489105,46.441216
197,Dušan Vlahović,2025,5,2.981242,1.344676,43.954628,0.189729,0.400850,0.681134,0.176847,...,20.179411,4.577690,8.562578,42.774212,31.694641,45.955669,42.888779,446.125336,118.782661,45.479759
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
276,Harrison Reed,2025,8,0.942336,0.946927,47.190163,0.062266,0.179836,0.014909,0.053172,...,38.736031,18.454418,15.705069,77.652840,8.295171,49.562225,10.702530,280.073090,24.931524,43.485786
548,Moussa Niakhate,2025,7,0.734430,0.553109,40.305679,0.089972,0.298366,0.012993,0.060606,...,56.496197,30.156044,27.074888,117.531860,13.456281,70.785370,11.273578,1198.723022,32.738331,59.758766
395,Kelvin Amian,2025,5,0.507238,0.334242,38.092457,0.105329,0.314943,0.012876,0.080307,...,56.178200,26.553751,22.694515,132.414276,21.664583,52.886692,20.829477,861.111023,96.600548,55.834576
92,Ben Davies,2025,10,0.874276,0.524376,44.545650,0.060908,0.136054,0.007096,0.036000,...,60.635365,21.740837,21.193996,94.644127,9.113230,73.830055,7.681743,641.484253,33.419834,61.212132
