In [None]:
import requests
import pandas as pd

In [None]:
## Definining the API URLs an dfetching the data
bootstrap_url = "https://fantasy.premierleague.com/api/bootstrap-static/"
fixtures_url = "https://fantasy.premierleague.com/api/fixtures/"
pd.set_option('display.max_columns', None) 

bootstrap_response = requests.get(bootstrap_url)
boot_data = bootstrap_response.json()

fixtures_response = requests.get(fixtures_url)
fixtures_data = fixtures_response.json()

In [None]:
# teams ---> A scomplete summary of the teams metrics
# elements ---> A complete summary of the players metrics
# element_stats ---> Has the metrics we are interested in for each player
# element_types ---> Has the player positions (Goalkeeper, Defender, Midfielder, Forward)

In [None]:
display(pd.DataFrame(boot_data['element_stats']))

In [None]:
display(pd.DataFrame(boot_data['element_types']))

In [None]:
# Analyzing the player data from the 'elements' key
player_data = pd.DataFrame(boot_data['elements'])
player_data.head()

In [None]:
# Filtered out players who are active and have 100% chance of playing next round
# May tweak this later to to include those with 75% chance too. This gives a wider pool of players to choose from and adds an interactive 
# element where users can choose a safe bet or a riskier pick or a riskier pick or a mix of both
active_player_data = player_data[(player_data['status'] == 'a') & (player_data['chance_of_playing_next_round'].fillna(100) == 100)].copy()

In [None]:
cols_to_fix = ['influence', 'creativity', 'threat', 'ict_index']
for col in cols_to_fix:
    active_player_data[col] = pd.to_numeric(active_player_data[col])



# Custom ICT Calculation
def calculate_custom_ict(row):
    if row['element_type'] == 2: # Defender
        return (row['creativity'] * 0.6) + (row['influence'] * 0.4)
    if row['element_type'] == 4: # Forward
        return (row['threat'] * 0.8) + (row['influence'] * 0.2)
    return row['ict_index'] # Default for others

active_player_data['ict_index'] = active_player_data.apply(calculate_custom_ict, axis=1)




# To avoid division by Zero (looking at players with at least 90-180 mins played)
# prevents bench-warmers with 5 mins from having skewed stats
min_minutes_threshold = 270 # 3 full games sample size
active_player_data = active_player_data[active_player_data['minutes'] >= min_minutes_threshold].copy()
active_player_data['ict_per90'] = (active_player_data['ict_index'] / active_player_data['minutes']) * 90
active_player_data['norm_ict_per90'] = (active_player_data['ict_per90'] - active_player_data['ict_per90'].min()) / (active_player_data['ict_per90'].max() - active_player_data['ict_per90'].min()) * 100
active_player_data

In [None]:
# De-cluttering the active players dataframe to only the columns we are interested in
player_df_cols = [i['name'] for i in boot_data['element_stats']]
players_df = active_player_data[['id','web_name', 'team', 'element_type', 'now_cost', 'selected_by_percent', 'total_points', 'norm_ict_per90'] + player_df_cols].copy()
players_df.head()

In [None]:
# Teams data from the 'teams' key
team_data = pd.DataFrame(boot_data['teams'])

In [None]:
# Mapping team IDs to team names and positions in the de-cluttered players dataframe
team_map = {team['id']: team['name'] for team in boot_data['teams']}
players_df['team'] = players_df['team'].map(team_map)

positions_map = {pos['id']: pos['singular_name'] for pos in boot_data['element_types']}
players_df['element_type'] = players_df['element_type'].map(positions_map)
players_df.head()

In [None]:
players_df.info(verbose=True, show_counts=True)

In [None]:
# Diving dinto fixtures
fixtures_df = pd.DataFrame(fixtures_data)
fixtures_df.head()

In [None]:
cleaned_fixtures_df = fixtures_df[['event', 'team_h', 'team_a', 'team_h_score', 'team_a_score', 'team_h_difficulty', 'team_a_difficulty', 'finished']].copy()
cleaned_fixtures_df['team_h'] = cleaned_fixtures_df['team_h'].map(team_map)
cleaned_fixtures_df['team_a'] = cleaned_fixtures_df['team_a'].map(team_map)
cleaned_fixtures_df.head()

In [None]:
historical_fixtures = cleaned_fixtures_df[cleaned_fixtures_df['finished'] == True].copy()
future_fixtures = cleaned_fixtures_df[cleaned_fixtures_df['finished'] == False].copy() 

In [None]:
team_stats = []

for i in fixtures_data:
    if i['finished'] == True:
        bps_data = next((item for item in i['stats'] if item['identifier'] == 'bps'), None)
        
        home_bps = sum(p['value'] for p in bps_data['h']) if bps_data else 0
        away_bps = sum(p['value'] for p in bps_data['a']) if bps_data else 0

        team_stats.append({
            'team_id': i['team_h'],
            'goals_conceded': i['team_a_score'],
            'gameweek': i['event'],
            'bps_conceded': home_bps
        })
        team_stats.append({
            'team_id': i['team_a'],
            'goals_conceded': i['team_h_score'],
            'gameweek': i['event'],
            'bps_conceded': away_bps
        })

team_perf = pd.DataFrame(team_stats).sort_values(['team_id', 'gameweek'])
team_perf

In [None]:
# Calculate Rolling Stats (3-game window)
team_perf['roll_goals'] = team_perf.groupby('team_id')['goals_conceded'].transform(lambda x: x.rolling(3).mean())
team_perf['roll_bps'] = team_perf.groupby('team_id')['bps_conceded'].transform(lambda x: x.rolling(3).mean())


team_perf['opp_def_form'] = team_perf.groupby('team_id')['roll_goals'].shift(1) # SHIFT to make it "Form going into the next game"
team_perf['opp_bps_form'] = team_perf.groupby('team_id')['roll_bps'].shift(1)

# Multipliers vs. League Average
league_goals_avg = team_perf['goals_conceded'].mean()
league_bps_avg = team_perf['bps_conceded'].mean()

team_perf['mult_goals'] = team_perf['opp_def_form'] / league_goals_avg
team_perf['mult_bps'] = team_perf['opp_bps_form'] / league_bps_avg

# Fill NaN (early season) with 1.0 (neutral)
team_perf = team_perf.fillna(1.0)
team_perf['team_id'] = team_perf['team_id'].map(team_map)
team_perf

In [None]:
next_gw = future_fixtures['event'].min() #next gameweek

# Filter for only the next gameweek's matches
next_matches = future_fixtures[future_fixtures['event'] == next_gw].copy()

In [None]:
next_matches

In [None]:
player_map = players_df[['id', 'web_name', 'team', 'element_type', 'norm_ict_per90', 'now_cost']].copy() 

home_players = pd.merge(player_map, next_matches, left_on='team', right_on='team_h')
home_players['opponent_id'] = home_players['team_a']
display(home_players.head(10))
print(home_players.shape)

In [None]:
away_players = pd.merge(player_map, next_matches, left_on='team', right_on='team_a')
away_players['opponent_id'] = away_players['team_h']
display(away_players.head(10))
print(away_players.shape)

In [None]:
full_lineup = pd.concat([home_players, away_players])
full_lineup

In [None]:
latest_team_form = team_perf.groupby('team_id').tail(1)[['team_id', 'mult_goals', 'mult_bps']]
display(latest_team_form)

In FPL, goals are the single biggest event. A goal is worth 4, 5, or 6 points (depending on position). An assist is worth 3 points. Goals are the primary driver for clean sheets (for the other team) and massive scorelines. If a team has high mult_goals, the probability of an attacker getting a return is statistically much higher. 

Underlying Dominance metric (mult_bps). Some teams are "Bonus Goldmines" because they concede high-quality chances, even if they don't concede 5 goals every game. By giving BPS a 30% weight, we are rewarding players who play against "passive" teams. These are teams that allow lots of shots on target, crosses, and passes—the "creative" stats that rack up bonus points.

In [None]:
# Checking the vulnerability of the teams which players are facing in the next matches
vulnerability_df = pd.merge(full_lineup, latest_team_form, left_on='opponent_id', right_on='team_id', how='left')
vulnerability_df['opp_score'] = (vulnerability_df['mult_goals'] * 0.7) + (vulnerability_df['mult_bps'] * 0.3) #secret sauce - weighteed rule base logic
vulnerability_df

In [None]:
final_df = vulnerability_df.copy()
final_df['player_score'] = final_df['norm_ict_per90'] * final_df['opp_score'] # calculating the players ability for the upcoming matches
final_df['now_cost'] = final_df['now_cost'] / 10  # Adjusting cost to actual value in millions
final_df['vfm_score'] = final_df['player_score'] / final_df['now_cost'] # points potential per million pounds (vfm- value for money)

# I don't care about the price! Give me the best players setting
final_df[['event','id', 'web_name', 'team','element_type', 'now_cost', 'opponent_id', 'player_score', 'vfm_score']].sort_values(by='player_score', ascending=False).head(15) 

In [None]:
# Give me the top players who are 'Budget Friendly' so i can spend money elesewhere
final_df[['event','id', 'web_name', 'team','element_type', 'now_cost', 'opponent_id', 'player_score', 'vfm_score']].sort_values(by='vfm_score', ascending=False).head(13)

In [None]:
# Best 11
import pulp

prob = pulp.LpProblem("FPL_Optimal_11", pulp.LpMaximize)

player_vars = pulp.LpVariable.dicts("player", final_df.index, cat='Binary') #(0 if not picked, 1 if picked)

#prob += pulp.lpSum([final_df.loc[i, 'player_score'] * player_vars[i] for i in final_df.index]) + \
#        pulp.lpSum([final_df.loc[i, 'now_cost'] * 0.001 * player_vars[i] for i in final_df.index]) #captaincy rule

prob += pulp.lpSum([final_df.loc[i, 'player_score'] * player_vars[i] for i in final_df.index]) # Maximize the total Final_Score


prob += pulp.lpSum([final_df.loc[i, 'now_cost'] * player_vars[i] for i in final_df.index]) <= 100 # Toal Budget Constraint

prob += pulp.lpSum([player_vars[i] for i in final_df.index]) == 11 # Total Players Constraint

# Formations constraints
prob += pulp.lpSum([player_vars[i] for i in final_df.index if final_df.loc[i, 'element_type'] == "Goalkeeper"]) == 1
prob += pulp.lpSum([player_vars[i] for i in final_df.index if final_df.loc[i, 'element_type'] == "Defender"]) >= 3
prob += pulp.lpSum([player_vars[i] for i in final_df.index if final_df.loc[i, 'element_type'] == "Defender"]) <= 5
prob += pulp.lpSum([player_vars[i] for i in final_df.index if final_df.loc[i, 'element_type'] == "Midfielder"]) >= 3
prob += pulp.lpSum([player_vars[i] for i in final_df.index if final_df.loc[i, 'element_type'] == "Midfielder"]) <= 5
prob += pulp.lpSum([player_vars[i] for i in final_df.index if final_df.loc[i, 'element_type'] == "Forward"]) >= 1
prob += pulp.lpSum([player_vars[i] for i in final_df.index if final_df.loc[i, 'element_type'] == "Forward"]) <= 3

teams = final_df['team'].unique()

for team_id in teams:
    prob += pulp.lpSum([player_vars[i] for i in final_df.index if final_df.loc[i, 'team'] == team_id]) <= 3 # player per team constraint


prob.solve()

# 15 man squad
import pulp

prob = pulp.LpProblem("FPL_Optimal_11", pulp.LpMaximize)

player_vars = pulp.LpVariable.dicts("player", final_df.index, cat='Binary') #(0 if not picked, 1 if picked)

#prob += pulp.lpSum([final_df.loc[i, 'player_score'] * player_vars[i] for i in final_df.index]) + \
#        pulp.lpSum([final_df.loc[i, 'now_cost'] * 0.001 * player_vars[i] for i in final_df.index]) #captaincy rule

prob += pulp.lpSum([final_df.loc[i, 'player_score'] * player_vars[i] for i in final_df.index]) # Maximize the total Final_Score


prob += pulp.lpSum([final_df.loc[i, 'now_cost'] * player_vars[i] for i in final_df.index]) <= 100 # Toal Budget Constraint


prob += pulp.lpSum([player_vars[i] for i in final_df.index]) == 15 # Total Players Constraint

prob += pulp.lpSum([player_vars[i] for i in final_df.index if final_df.loc[i, 'element_type'] == "Goalkeeper"]) == 2 # GKP
prob += pulp.lpSum([player_vars[i] for i in final_df.index if final_df.loc[i, 'element_type'] == "Defender"]) == 5 # DEF
prob += pulp.lpSum([player_vars[i] for i in final_df.index if final_df.loc[i, 'element_type'] == "Midfielder"]) == 5 # MID
prob += pulp.lpSum([player_vars[i] for i in final_df.index if final_df.loc[i, 'element_type'] == "Forward"]) == 3 # FWD

teams = final_df['team'].unique()

for team_id in teams:
    prob += pulp.lpSum([player_vars[i] for i in final_df.index if final_df.loc[i, 'team'] == team_id]) <= 3 # player per team constraint


prob.solve()

In [None]:
best_idx = [i for i in final_df.index if player_vars[i].varValue == 1]
best = final_df.loc[best_idx]
display(best)
#display(best[['web_name', 'element_type', 'now_cost', 'player_score']])
print(f"Total Cost: {best['now_cost'].sum()} | Total Score: {best['player_score'].sum()}")

Goalkeepers -> Influence -> Reflects saves and bonus point potential. Threat is almost irrelevant.

Defenders -> Creativity + Influence	-> Reflects clean sheet potential and assist potential from crosses.

Midfielders	-> Creativity + Threat -> The "Double Threat"—they can score or assist.

Forwards -> Threat -> They are there to finish.