In [None]:
import logging
logging.basicConfig(level=logging.INFO)
from utils import get_player_ids_for_entry
from constants import CURRENT_STATS_FEATURES,CREATED_AVG_FEATURES
from data_collection import DataContainer,DataProcessor
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from catboost import CatBoostRegressor
from sklearn.preprocessing import StandardScaler,RobustScaler
import numpy as np
import pandas as pd
from scipy.optimize import linprog
import joblib
# logging.basicConfig(level=logging.DEBUG)
# print("🎉 Auto-executed!")
#uncomment if you just want to see a snapshot of data
# current_stats_snap = DataGrabber.get_raw_season_stats_snap()
# players_dict,_ = extract_and_process_playerinfo_to_dict(current_stats_snap)
# players_df = process_dictdata_to_dataframe(players_dict,['id','now_cost','element_type','web_name','status']+['points_per_game','form','expected_goal_involvements','total_points','creativity','bonus','ict_index',])

In [None]:
import joblib
data_obj = DataContainer(lookback_weeks=10,range_weeks=9)
joblib.dump(data_obj, 'collected_data.pkl')

In [None]:
import joblib
data_obj = joblib.load('collected_data.pkl')
# processed_data3 = DataProcessor(data_obj,horizon=3)
processed_data1 = DataProcessor(data_obj,horizon=3)
# joblib.dump(processed_data3, 'processed_data3.pkl')
joblib.dump(processed_data1, 'processed_data1.pkl')

In [None]:
import joblib
from constants import ID_FEATURES
processed_data1 = joblib.load('processed_data1.pkl')
# processed_data3 = joblib.load('processed_data3.pkl')
X1, y1 = processed_data1.training_data  # horizon=1
X1.drop(ID_FEATURES,axis=1,inplace=True)
# X3, y3 = processed_data3.training_data  # horizon=3
# X_all = pd.concat([X1, X3], ignore_index=True)
# y_all = pd.concat([y1, y3], ignore_index=True)
X_train, X_val, Y_train, y_val = train_test_split(X1, y1, test_size=0.2, random_state=37)
model = make_pipeline(RobustScaler(), CatBoostRegressor(iterations=1000, learning_rate=0.02, depth=6, verbose=100))
model.fit(X_train, Y_train)

preds = model.predict(X_val)
print("MAE:", mean_absolute_error(y_val, preds))
print("MSE:", mean_squared_error(y_val, preds))
print("RMSE:", mean_squared_error(y_val, preds, squared=False))
print("R2 Score:", r2_score(y_val, preds))
# Save model to file
joblib.dump(model, 'fpl_model.pkl')

In [None]:
import shap

# Extract the model part
model_only = model.named_steps['catboostregressor']

# Create explainer
explainer = shap.TreeExplainer(model_only)

# Get SHAP values
shap_values = explainer.shap_values(X_val)

# Plot
shap.summary_plot(shap_values, X_val)


In [None]:
model.named_steps

In [None]:
import joblib
predictor_data_obj = DataContainer(lookback_weeks=3,range_weeks=3)
joblib.dump(predictor_data_obj, 'predictor_data_obj.pkl')


In [None]:
import joblib
predictor_data_obj = joblib.load('predictor_data_obj.pkl')
predictor_processed_data = DataProcessor(predictor_data_obj,horizon=3,next_week_pred=True)
joblib.dump(predictor_processed_data, 'predictor_processed_data.pkl')

In [None]:
""" Run this for best Wildcard team. No constraint on players per team"""
# Step 1: Predict Future Points for All Players
# Assuming 'model' is your trained model, and 'players_df' is a dataframe with player features
# Add predictions to your player dataframe
import joblib
predictor_processed_data = joblib.load('predictor_processed_data.pkl')

# Ensure no NaN values in the features
predict_data = predictor_processed_data.training_data[0].fillna(0)
model = joblib.load('fpl_model.pkl')
predict_data['predicted_future_points'] = model.predict(predict_data.drop(ID_FEATURES,axis=1))
predict_data.loc[predict_data['num_fixtures'] == 0, 'predicted_future_points'] = 0
predict_data[['id','element_type','web_name']] = predictor_processed_data.data_obj.players[['id','element_type','web_name']]
# Step 2: Set Constraints

# Total budget (assuming the total budget is 100.0)
budget = 100  # Total budget in FPL units

# Player positions constraints (e.g., 2 goalkeepers, 5 defenders, 5 midfielders, 3 forwards)
position_constraints = {
    1:2,
    2:5,
    3:5,
    4:3,
}

# Assuming players_df has columns: 'id', 'position', 'cost', 'predicted_future_points'
# Step 3: Optimize the Team Selection

# Define cost and predicted points as arrays for optimization
costs = predict_data['now_cost'].values / 10.0  # Convert cost from 0.1 million to actual cost (in millions)
future_points = predict_data['predicted_future_points'].values
positions = predict_data['element_type'].values

# Number of players to select (e.g., 15 players for an FPL team)
num_players = 15

# Define optimization coefficients (we want to maximize future points)
c = -future_points  # We negate because linprog minimizes by default

# Constraints:
# 1. Total cost must not exceed the budget
# 2. Positional constraints (e.g., 2 goalkeepers, 5 defenders, etc.)
A_eq = []
b_eq = []
for position, max_count in position_constraints.items():
    # Create a constraint matrix for each position
    position_indices = (positions == position)
    A_eq.append(position_indices.astype(int))  # Make sure the number of players for each position doesn't exceed max_count
    b_eq.append(max_count)

# 3. Select exactly 15 players
A_eq.append(np.ones(len(predict_data)))  # Ensure we select exactly 15 players
b_eq.append(num_players)

# Step 4: Solve the optimization problem using linprog
result = linprog(c, A_eq=np.array(A_eq), b_eq=np.array(b_eq), bounds=[(0, 1)] * len(predict_data), method='highs')

# Step 5: Check for errors and Get the selected players
if result.success:
    selected_player_indices = np.where(result.x >= 0.5)[0]  # Get the players with a value >= 0.5 (i.e., selected)

    # Step 6: Get the player IDs and their details
    selected_players = predict_data.iloc[selected_player_indices]
    selected_players.sort_values(by='element_type',ascending=True,inplace=True)
    print(selected_players[['web_name','element_type', 'now_cost','predicted_future_points']])

    # Total cost of the selected players
    total_cost = selected_players['now_cost'].sum() / 10  # Convert cost from 0.1 million to actual cost
    total_points = selected_players['predicted_future_points'].sum()
    print(f"Total cost of selected players: {total_cost}")
    print(f"Total predicted points: {total_points}")
else:
    print("Optimization failed: ", result.message)
    # Debugging: Check if total cost exceeds budget or if other issues exist
    print(f"Total cost of all players: {predict_data['now_cost'].sum() / 10}")
    print(f"Number of players available: {len(predict_data)}")
    print(f"Positional constraints: {position_constraints}")


In [None]:
# current_team_ids = [383,350,88,255,366,328,402,99,321,401,58,521,71,270,295]
entry_id = 1729758#Me
# entry_id = 57510#Martin
# entry_id = 1724338#Kris
event_id = 34
transferred = False
current_team_ids = get_player_ids_for_entry(1729758,34)
# if transferred:
#     current_team_ids.remove(best_transfer[0]['id'])
#     current_team_ids.append(best_transfer[1]['id'])
my_teams_predicted_points = predict_data[predict_data['id'].isin(current_team_ids)]
my_teams_predicted_points.sort_values(by=['element_type',],ascending=True)
my_teams_predicted_points[CURRENT_STATS_FEATURES+['predicted_future_points','web_name']]

In [None]:
from sklearn.tree import export_graphviz
from sklearn import tree
import matplotlib.pyplot as plt
import joblib
pipeline = joblib.load('fpl_model.pkl')
# pipeline.named_steps
model = pipeline.named_steps["randomforestregressor"]

# Select one tree from the forest
estimator = model.estimators_[0]  # You can loop or sample other indices

# Plot using matplotlib (quick & easy way)
plt.figure(figsize=(20, 10))
tree.plot_tree(estimator, 
               feature_names=CURRENT_STATS_FEATURES+CREATED_AVG_FEATURES, 
               filled=True,
               max_depth=3,  # Limit for readability
               fontsize=10)
plt.show()


In [None]:
"""Run this if you want to know the best 1 to 1 transfer to make"""
current_team_ids = [383,350,88,255,366,328,402,99,321,401,58,521,71,270,295]  # your 15-player team as player IDs
budget_left = 0.8        # remaining budget from original 1000 (or 100.0)

# Get your full player pool with predicted future points
all_players = predict_data.copy()
# Get your current team details
current_team = all_players[all_players['id'].isin(current_team_ids)]

best_transfer = None
best_gain = 0

# Loop through all current team players
for i, old_player in current_team.iterrows():
    old_pos = old_player['element_type']
    old_cost = old_player['now_cost']
    
    # Look at all possible replacements in same position
    candidates = all_players[
        (all_players['element_type'] == old_pos) &
        (~all_players['id'].isin(current_team_ids))  # not already owned
    ]
    
    for j, new_player in candidates.iterrows():
        cost_diff = new_player['now_cost'] - old_cost
        if cost_diff <= budget_left:
            # Create new team with the candidate transfer
            temp_team = current_team.copy()
            temp_team = temp_team[temp_team['id'] != old_player['id']]
            temp_team = pd.concat([temp_team, new_player.to_frame().T])
            
            team_points = temp_team['predicted_future_points'].sum()
            
            # Track the best one
            if team_points > best_gain:
                best_gain = team_points
                best_transfer = (old_player, new_player)
from pprint import pprint
pprint(best_transfer)