In [1]:
from data_collection import DataGrabber,DataProcessor,extract_and_process_playerinfo_to_dict
import logging
# logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(level=logging.INFO)

In [2]:
data_obj = DataGrabber()

INFO:root:Starting method: get_raw_season_stats_snap
INFO:root:Starting method: query_API
INFO:root:Method query_API completed in 0.20 seconds
INFO:root:Method get_raw_season_stats_snap completed in 0.20 seconds
INFO:root:Starting method: get_raw_fixture_data
INFO:root:Starting method: query_API
INFO:root:Method query_API completed in 0.16 seconds
INFO:root:Method get_raw_fixture_data completed in 0.16 seconds
INFO:root:Starting method: get_gameweek_to_datestr_mapping
INFO:root:Method get_gameweek_to_datestr_mapping completed in 0.00 seconds
INFO:root:Starting method: get_player_data
INFO:root:Extracting player data from dictionary of length: 552...
INFO:root:Number of players: 552
INFO:root:New player, caching for: G.Jesus
INFO:root:Finished fetching gameweek data for 0 out of 552 player dictionaries...
INFO:root:New player, caching for: Gabriel
INFO:root:New player, caching for: Havertz
INFO:root:New player, caching for: J.Timber
INFO:root:New player, caching for: Jorginho
INFO:root:

In [None]:
processed_data = DataProcessor(data_obj,features=['points_per_game','form','expected_goal_involvements','total_points','creativity','bonus','ict_index',])

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler

X_train,Y_train = processed_data.training_data
X_train, X_val, Y_train, y_val = train_test_split(X_train, Y_train, test_size=0.2, random_state=37)
model = make_pipeline(StandardScaler(), RandomForestRegressor(n_estimators=100))
model.fit(X_train, Y_train)

preds = model.predict(X_val)
print("MAE:", mean_absolute_error(y_val, preds))

In [None]:
""" Run this for best Wildcard team. No constraint on players per team"""

import numpy as np
import pandas as pd
from scipy.optimize import linprog

# Step 1: Predict Future Points for All Players
# Assuming 'model' is your trained model, and 'players_df' is a dataframe with player features
# Add predictions to your player dataframe
current_stats_snap = data_obj.get_raw_season_stats_snap()
players_df = data_obj.get_player_info(current_stats_snap,cache=False)
players_df["team"] = players_df.apply(lambda x: data_obj.players[x["id"]]["team"], axis=1)
players_df["num_fixtures"] = players_df.apply(lambda x: processed_data.get_team_fixture_info(data_obj.current_gameweek+1, data_obj.current_gameweek+1)[x["team"]]["num_fixtures"], axis=1)
players_df["total_fdr"] = players_df.apply(lambda x: processed_data.get_team_fixture_info(data_obj.current_gameweek+1, data_obj.current_gameweek+1)[x["team"]]["total_fdr"], axis=1)
predict_data = players_df[processed_data.features[1:] + ["num_fixtures", "total_fdr"]]

# Ensure no NaN values in the features
predict_data = predict_data.fillna(0)

players_df['predicted_future_points'] = model.predict(predict_data)

# Step 2: Set Constraints

# Total budget (assuming the total budget is 100.0)
budget = 100  # Total budget in FPL units

# Player positions constraints (e.g., 2 goalkeepers, 5 defenders, 5 midfielders, 3 forwards)
position_constraints = {
    1:2,
    2:5,
    3:5,
    4:3,
}

# Assuming players_df has columns: 'id', 'position', 'cost', 'predicted_future_points'
# Step 3: Optimize the Team Selection

# Define cost and predicted points as arrays for optimization
costs = players_df['now_cost'].values / 10.0  # Convert cost from 0.1 million to actual cost (in millions)
future_points = players_df['predicted_future_points'].values
positions = players_df['element_type'].values

# Number of players to select (e.g., 15 players for an FPL team)
num_players = 15

# Define optimization coefficients (we want to maximize future points)
c = -future_points  # We negate because linprog minimizes by default

# Constraints:
# 1. Total cost must not exceed the budget
# 2. Positional constraints (e.g., 2 goalkeepers, 5 defenders, etc.)
A_eq = []
b_eq = []
for position, max_count in position_constraints.items():
    # Create a constraint matrix for each position
    position_indices = (positions == position)
    A_eq.append(position_indices.astype(int))  # Make sure the number of players for each position doesn't exceed max_count
    b_eq.append(max_count)

# 3. Select exactly 15 players
A_eq.append(np.ones(len(players_df)))  # Ensure we select exactly 15 players
b_eq.append(num_players)

# Step 4: Solve the optimization problem using linprog
result = linprog(c, A_eq=np.array(A_eq), b_eq=np.array(b_eq), bounds=[(0, 1)] * len(players_df), method='highs')

# Step 5: Check for errors and Get the selected players
if result.success:
    selected_player_indices = np.where(result.x >= 0.5)[0]  # Get the players with a value >= 0.5 (i.e., selected)

    # Step 6: Get the player IDs and their details
    selected_players = players_df.iloc[selected_player_indices]
    print(selected_players[['web_name', 'now_cost', 'element_type', 'predicted_future_points']])

    # Total cost of the selected players
    total_cost = selected_players['now_cost'].sum() / 10  # Convert cost from 0.1 million to actual cost
    total_points = selected_players['predicted_future_points'].sum()
    print(f"Total cost of selected players: {total_cost}")
    print(f"Total predicted points: {total_cost}")
else:
    print("Optimization failed: ", result.message)
    # Debugging: Check if total cost exceeds budget or if other issues exist
    print(f"Total cost of all players: {players_df['now_cost'].sum() / 10}")
    print(f"Number of players available: {len(players_df)}")
    print(f"Positional constraints: {position_constraints}")


In [None]:
"""Run this if you want to know the best 1 to 1 transfer to make"""
current_team_ids = [383,350,88,255,366,328,402,99,321,401,58,521,71,270,295]  # your 15-player team as player IDs
budget_left = 0.8        # remaining budget from original 1000 (or 100.0)

# Get your full player pool with predicted future points
all_players = players_df.copy()
all_players['predicted'] = model.predict(all_players[processed_data.features[1:] + ["num_fixtures", "total_fdr"]])

# Get your current team details
current_team = all_players[all_players['id'].isin(current_team_ids)]

best_transfer = None
best_gain = 0

# Loop through all current team players
for i, old_player in current_team.iterrows():
    old_pos = old_player['element_type']
    old_cost = old_player['now_cost']
    
    # Look at all possible replacements in same position
    candidates = all_players[
        (all_players['element_type'] == old_pos) &
        (~all_players['id'].isin(current_team_ids))  # not already owned
    ]
    
    for j, new_player in candidates.iterrows():
        cost_diff = new_player['now_cost'] - old_cost
        if cost_diff <= budget_left:
            # Create new team with the candidate transfer
            temp_team = current_team.copy()
            temp_team = temp_team[temp_team['id'] != old_player['id']]
            temp_team = pd.concat([temp_team, new_player.to_frame().T])
            
            team_points = temp_team['predicted'].sum()
            
            # Track the best one
            if team_points > best_gain:
                best_gain = team_points
                best_transfer = (old_player, new_player)
best_transfer