In [1163]:
from sklearn.preprocessing import StandardScaler, LabelEncoder, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold
import numpy as np
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.cluster import KMeans


In [1164]:
import pandas as pd
# Remove the limit on the number of columns displayed
pd.set_option('display.max_columns', None)

In [1165]:
import requests

def get_current_gameweek():
    # FPL API endpoint
    url = "https://fantasy.premierleague.com/api/bootstrap-static/"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        # Iterate through gameweeks
        for event in data['events']:
            if event['is_current']:
                return event['id']  # Current gameweek number
    else:
        print("Failed to fetch data from FPL API")
        return None

# Example usage
current_gameweek = get_current_gameweek()
if current_gameweek:
    print(f"The current gameweek is: {current_gameweek}")


The current gameweek is: 19


In [1166]:
fpl_players_df = pd.read_csv("fpl_historical_data.csv")

In [1167]:
fpl_players_df.tail()

Unnamed: 0,gameweek,id,web_name,team,position,can_transact,can_select,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,ep_next,ep_this,event_points,first_name,form,in_dreamteam,news,news_added,now_cost,photo,points_per_game,removed,second_name,selected_by_percent,special,squad_number,status,team_code,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,region,team_join_date,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,expected_goals_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,goals_conceded_per_90,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,direct_freekicks_order,penalties_order,element_type
12801,19,705,Arthur,Brentford,Defender,True,True,,,622536,0,0,0,0,0,0.5,-0.5,0,Benjamin,0.0,False,,,4.0,622536.jpg,0.0,False,Arthur,0.0,False,,a,94,0,281,116,40,27,0.0,0.0,241.0,2024-03-08,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,499,197,569,217,682,234,0.0,0.0,,,2
12802,19,706,Abbott,Nott'm Forest,Defender,True,True,,,549067,0,0,0,0,0,1.2,1.2,0,Zach,0.0,False,,,4.0,549067.jpg,0.0,False,Abbott,0.0,False,,a,17,0,160,160,24,24,0.0,0.0,241.0,2022-08-22,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,484,183,554,203,695,236,0.0,0.0,,,2
12803,19,707,Rees-Dottin,Bournemouth,Forward,True,True,,,606774,0,0,0,0,0,1.1,1.1,0,Remy,0.0,False,,,4.5,606774.jpg,0.0,False,Rees-Dottin,0.0,False,,a,91,0,1112,1112,201,201,0.0,0.0,241.0,2024-12-28,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,626,60,670,65,646,75,0.0,0.0,,,4
12804,19,708,Wilson-Brown,Leicester,Defender,True,True,,,532535,0,0,0,0,0,1.2,0.7,0,Thomas,0.0,False,,,4.0,532535.jpg,0.0,False,Wilson-Brown,0.0,False,,a,13,0,48,48,2,2,0.0,0.0,241.0,2022-07-08,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,479,178,549,198,704,238,0.0,0.0,,,2
12805,19,709,Danns,Liverpool,Forward,True,True,,,500058,0,0,0,0,0,2.1,2.1,0,Jayden,0.0,False,,,4.5,500058.jpg,0.0,False,Danns,0.0,False,,a,14,0,245,245,40,40,0.0,0.0,241.0,2024-02-20,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,591,53,639,59,692,78,0.0,0.0,,,4


In [1168]:
fpl_fixtures_df = pd.read_csv("fpl_historical_fixtures.csv")

In [1169]:
fpl_fixtures_df.head()

Unnamed: 0,gameweek,code,event,finished,finished_provisional,id,kickoff_time,minutes,provisional_start_time,started,team_a,team_a_score,team_h,team_h_score,stats,team_h_difficulty,team_a_difficulty,pulse_id
0,1,2444470,1,True,True,1,2024-08-16T19:00:00Z,90,False,True,9,0.0,14,1.0,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",3,3,115827
1,1,2444473,1,True,True,4,2024-08-17T11:30:00Z,90,False,True,12,2.0,10,0.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",5,2,115830
2,1,2444471,1,True,True,2,2024-08-17T14:00:00Z,90,False,True,20,0.0,1,2.0,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",2,5,115828
3,1,2444472,1,True,True,3,2024-08-17T14:00:00Z,90,False,True,5,3.0,8,0.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",3,3,115829
4,1,2444474,1,True,True,5,2024-08-17T14:00:00Z,90,False,True,17,0.0,15,1.0,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",2,4,115831


 Add team names

In [1171]:
# # Map team names back to their IDs
# team_mapping = {
#         1: 'Arsenal', 2: 'Aston Villa', 3: 'Bournemouth', 4: 'Brentford',
#         5: 'Brighton', 6: 'Chelsea', 7: 'Crystal Palace', 8: 'Everton',
#         9: 'Fulham', 10: 'Ipswich Town', 11: 'Leicester City', 12: 'Liverpool',
#         13: 'Manchester City', 14: 'Manchester Utd', 15: 'Newcastle Utd', 16: 'Nottingham Forest',
#         17: 'Southampton', 18: 'Tottenham', 19: 'West Ham', 20: 'Wolves'
#     }
# # Add Team 1 and Team 2 columns by mapping team_h and team_a using the team_mapping dictionary
# fpl_fixtures_df['Team 1'] = fpl_fixtures_df['team_h'].map(team_mapping)
# fpl_fixtures_df['Team 2'] = fpl_fixtures_df['team_a'].map(team_mapping)


Get data separately

In [1173]:
# Define the range of gameweeks
gameweeks = range(1, current_gameweek+1)  # Adjust as per the current number of gameweeks

# Initialize dictionaries to store gameweek-specific DataFrames
fixtures_dfs = {}
players_dfs = {}

# Loop through each gameweek and create DataFrames
for gw in gameweeks:
    # Filter fixtures for the current gameweek
    fixtures_dfs[gw] = fpl_fixtures_df[fpl_fixtures_df['gameweek'] == gw]
    
    # Filter players for the current gameweek
    players_dfs[gw] = fpl_players_df[fpl_players_df['gameweek'] == gw]

    # Optionally, save to CSV files if needed
    fixtures_dfs[gw].to_csv(f'fixtures_df{gw}.csv', index=False)
    players_dfs[gw].to_csv(f'fpl_players_df{gw}.csv', index=False)

# Example: Accessing a specific gameweek DataFrame
fixtures_df1 = fixtures_dfs[1]  # Fixtures for Gameweek 1
players_df1 = players_dfs[1]    # Players for Gameweek 1

# Print summary for confirmation
print(f"Created {len(fixtures_dfs)} fixtures DataFrames and {len(players_dfs)} players DataFrames.")


Created 19 fixtures DataFrames and 19 players DataFrames.


Process scores through function

In [1175]:
def process_gameweek_data(fixtures_df, players_df):
    def calculate_average_fdr():
        remaining_fixtures = fixtures_df[fixtures_df['finished'] == True]
        fdr_values = []
        remaining_fixtures = remaining_fixtures.sort_values(by='kickoff_time')
        for team in set(remaining_fixtures['team_h']).union(set(remaining_fixtures['team_a'])):
            team_fixtures = remaining_fixtures[
                (remaining_fixtures['team_h'] == team) | (remaining_fixtures['team_a'] == team)
            ].head(1) 
            for idx, fixture in team_fixtures.iterrows():
                if fixture['team_h'] == team:
                    fdr_values.append({'team': team, 'fdr': fixture['team_h_difficulty']})
                elif fixture['team_a'] == team:
                    fdr_values.append({'team': team, 'fdr': fixture['team_a_difficulty']})
        fdr_df = pd.DataFrame(fdr_values)
        average_fdr = fdr_df.groupby('team')['fdr'].mean().reset_index()
        average_fdr.columns = ['team_id', 'average_fdr']
        return average_fdr
    
    def add_average_fdr(players_df):
            # Get average FDR for each team
            average_fdr = calculate_average_fdr()
        
            # Map team names back to their IDs
            team_mapping = {
                1: 'Arsenal', 2: 'Aston Villa', 3: 'Bournemouth', 4: 'Brentford',
                5: 'Brighton', 6: 'Chelsea', 7: 'Crystal Palace', 8: 'Everton',
                9: 'Fulham', 10: 'Ipswich', 11: 'Leicester', 12: 'Liverpool',
                13: 'Man City', 14: 'Man Utd', 15: 'Newcastle', 16: "Nott'm Forest",
                17: 'Southampton', 18: 'Spurs', 19: 'West Ham', 20: 'Wolves',
            }
            
            # Map team names
            average_fdr['team_name'] = average_fdr['team_id'].map(team_mapping)
            # Original team mapping
            
            # Reverse the mapping
            reversed_team_mapping = {name: id_ for id_, name in team_mapping.items()}
            
            # Map the 'team' column
            players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
    
            # Invert the average FDR
            max_fdr = average_fdr['average_fdr'].max()
            average_fdr['adjusted_fdr'] = 7 - average_fdr['average_fdr']
            # average_fdr.drop(columns = ['web_name'])
    
            players_df = players_df.merge(
            average_fdr[['team_id', 'adjusted_fdr', 'average_fdr']],
            how='left',
            on='team_id',
            suffixes=('', '_avg')
            )
                # Check for and drop any duplicates
            players_df = players_df.loc[:, ~players_df.columns.duplicated()]
    
            return players_df
        
    irrelevant_columns_players = [
        'squad_number',
        'region',
        'corners_and_indirect_freekicks_order',
        'chance_of_playing_this_round',
        'news_added',
        'chance_of_playing_next_round',
        'code', 'first_name', 'second_name', 'photo', 'news','direct_freekicks_text',
        'penalties_text','transfers_in','transfers_out'
    ]
    players_df_cleaned = players_df.drop(columns=irrelevant_columns_players, errors='ignore')
    players_df_cleaned['direct_freekicks_order'] = players_df_cleaned['direct_freekicks_order'].fillna(0)
    players_df_cleaned['penalties_order'] = players_df_cleaned['penalties_order'].fillna(0)
    players_df = players_df[~players_df['status'].isin(['i', 'u'])]
    players_df_cleaned = players_df_cleaned[~players_df_cleaned['status'].isin(['i', 'u'])]
    missing_scores_df = fixtures_df[(fixtures_df['team_h_score'].isnull()) | (fixtures_df['team_a_score'].isnull())]
    players_df_cleaned['value'] = players_df_cleaned['total_points'] / players_df_cleaned['now_cost']
    if 'status' in players_df.columns:
        le = LabelEncoder()
        players_df_cleaned['status'] = le.fit_transform(players_df_cleaned['status'])
    object_bool_columns = players_df_cleaned.select_dtypes(include=['object', 'bool'])
    bool_columns = players_df_cleaned.select_dtypes(include=['bool']).columns
    players_df_cleaned[bool_columns] = players_df_cleaned[bool_columns].astype(int)
    object_columns = players_df_cleaned.select_dtypes(include=['object']).columns
    players_df_cleaned[object_columns] = players_df_cleaned[object_columns].apply(pd.to_numeric, errors='coerce')
    data = players_df_cleaned.select_dtypes(include=['float64', 'int64']) 
    variances = data.var()
    sorted_variances = variances.sort_values(ascending=False)
    selector = VarianceThreshold(threshold=0)
    selector.fit(data)
    high_variance_columns = data.columns[selector.get_support()]
    high_variance_columns = [col for col in high_variance_columns if col != 'id']
    data = data[high_variance_columns]
    high_variance_sorted = sorted_variances[high_variance_columns].sort_values(ascending=False)
    data = data[high_variance_sorted.index]
    missing_cols = data.columns[data.isnull().any()]
    # Filter 'data' to align with 'players_df'
    data = data.loc[players_df.index]
    
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data)
    data_scaled_df = pd.DataFrame(data_scaled, columns=data.columns)
    # Align data_scaled_df with players_df
    data_scaled_df = data_scaled_df.set_index(players_df.index)
        
    pca = PCA(n_components=2)  
    data_pca = pca.fit_transform(data_scaled_df)
    explained_variance = pca.explained_variance_ratio_
    explained_variance_pc1 = round(pca.explained_variance_ratio_[0],2)  
    explained_variance_pc2 = round(pca.explained_variance_ratio_[1],2)
    # Create aligned data_pca with the same index as players_df
    data_pca = pd.DataFrame(data=data_pca, index=players_df.index, columns=['PC1', 'PC2'])
    
    # Create pca_df with aligned indices
    pca_df = pd.DataFrame(data_pca)
    
    pca_df['Total Points'] = players_df['total_points']
    pca_df['web_name'] = players_df['web_name']
    pca_df['team'] = players_df['team']
    # Handle missing values in pca_df
    pca_df['Total Points'] = pca_df['Total Points'].fillna(0)
    pca_df['web_name'] = pca_df['web_name'].fillna(players_df['web_name'])
    pca_df['team'] = pca_df['team'].fillna(players_df['team'])
    # Continue with PCA regression and scoring
    pca_df_cleaned = pca_df.copy()
    pca_df_cleaned['PC1'] = pca_df_cleaned['PC1'].fillna(0)
    pca_df_cleaned['Total Points'] = pca_df_cleaned['Total Points'].fillna(0)
    
    X = pca_df_cleaned[['PC1']]
    y = pca_df_cleaned['Total Points']
    poly = PolynomialFeatures(degree=2)
    X_poly = poly.fit_transform(X)
    poly_model = LinearRegression()
    poly_model.fit(X_poly, y)
    poly_predictions = poly_model.predict(X_poly)
    poly_r2 = r2_score(y, poly_predictions)
    linear_model = LinearRegression()
    linear_model.fit(X, y)
    linear_predictions = linear_model.predict(X)
    linear_r2 = r2_score(y, linear_predictions)
    threshold = 0.01  
    non_linearity = 1 if (poly_r2 - linear_r2 > threshold) else 0
    non_linearity_pc1 = 1 if poly_r2 > linear_r2 else 0
    # Calculate Scores based on non-linearity
    if non_linearity_pc1 == 1:
        print("here",non_linearity_pc1)
        # PC1 is non-linear; use polynomial features
        poly = PolynomialFeatures(degree=2)  # Adjust degree as needed
        poly_features = poly.fit_transform(pca_df[['PC1']])  # This will include the constant term
    
        # Add polynomial features to the DataFrame
        pca_df[['PC1', 'PC1^2']] = poly_features[:, 1:]  # Ignore the first column (constant)
    
        # Calculate Score using polynomial feature of PC1 and original PC2
        players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])
    else:
        # PC1 is linear; use original PC1
        players_df['Score'] = (explained_variance_pc1 * pca_df['PC1']) + (explained_variance_pc2 * pca_df['PC2'])

    inertia = []  # To store the inertia values
    k_values = range(1, 11)  # Adjust the range based on your needs
    for k in k_values:
        kmeans = KMeans(n_clusters=k, n_init = 10, random_state=42)  # Using random_state for reproducibility
        kmeans.fit(data_scaled_df)  # Fit the model
        inertia.append(kmeans.inertia_)  # Store the inertia value
    optimal_k = 2
    kmeans = KMeans(n_clusters=optimal_k, random_state=42)  # Using random_state for reproducibility
    if non_linearity_pc1:
        kmeans.fit(pca_df[['PC1','PC1^2', 'PC2']])  # Fit the model
    else:
        kmeans.fit(pca_df[['PC1', 'PC2']])  # Fit the model
    
    # Add cluster labels to your original DataFrame
    pca_df['Cluster'] = kmeans.labels_
    
    pca_df['web_name'] = players_df['web_name']  # Add the player names to the PCA DataFrame
    print("hello",pca_df[pca_df['web_name'].isin(players_df[players_df['team'] == "Wolves"]['web_name'])])


    players_with_clusters = players_df.merge(pca_df[['web_name', 'Cluster', 'PC1']], on='web_name', how='left')
    print( players_with_clusters.head())
    # Calculate the average 'PC1' value for each cluster
    cluster_avg = players_with_clusters.groupby('Cluster')['PC1'].mean()
    
    # Identify the worst cluster (lowest average 'PC1')
    worst_cluster = cluster_avg.idxmin()
    
    # Filter out players from the worst cluster
    players_df_filtered = players_with_clusters[players_with_clusters['Cluster'] != worst_cluster]


    players_df_filtered = add_average_fdr(players_df_filtered)
    weight = 2
    players_df_filtered['adjusted_score'] = players_df_filtered['Score'] + (weight * players_df_filtered['adjusted_fdr'])
    players_df_filtered_sorted = players_df_filtered.sort_values(by='Score', ascending=False)
    position_mapping = {1: 'goalkeeper', 2: 'defender', 3: 'midfielder', 4: 'forward'}
    players_df_filtered_sorted['position'] = players_df_filtered_sorted['element_type'].map(position_mapping)
    players_df_filtered_sorted = add_average_fdr(players_df_filtered_sorted)

  

    return players_df_filtered_sorted


In [1176]:
# Initialize results_df as an empty dictionary or DataFrame
results_df = {}  # Use a dictionary to store results for each gameweek

# Loop through all gameweeks
for gw in range(1, current_gameweek+1):  # Adjust range as needed for all gameweeks
    fixtures_dfs[gw].to_csv(f'fixtures_df{gw}.csv', index=False)
    players_dfs[gw].to_csv(f'fpl_players_df{gw}.csv', index=False)
    
    fixtures_df = pd.read_csv(f'fixtures_df{gw}.csv')  # Get fixtures DataFrame for the gameweek
    players_df = pd.read_csv(f'fpl_players_df{gw}.csv')   # Get players DataFrame for the gameweek    
    # Call the function for the gameweek
    results_df[gw] = process_gameweek_data(fixtures_df, players_df)
    

# After the loop, you can combine the results into a single DataFrame if needed
final_results_df = pd.concat(results_df.values(), ignore_index=True)
# Replace NaN values in the 'adjusted_score' column with 0
final_results_df['adjusted_score'] = final_results_df['adjusted_score'].fillna(0)


  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


here 1
hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.390510 -0.636643            12      Johnstone  Wolves  0.152498   
532  1.355891 -0.363268            56      Aït-Nouri  Wolves  1.838439   
534 -0.093888  0.090988            34     Bellegarde  Wolves  0.008815   
535 -1.028529 -0.378638             0        Bentley  Wolves  1.057872   
538 -1.096755  0.225496             0        Chiwome  Wolves  1.202871   
539 -1.379027  0.246655             0         Cundle  Wolves  1.901714   
540  2.253470  0.908467           111          Cunha  Wolves  5.078128   
541 -0.438750 -0.334327            17         Dawson  Wolves  0.192502   
542  0.146748 -0.206923            27        Doherty  Wolves  0.021535   
543 -0.749670  0.241579            20          Doyle  Wolves  0.562005   
547  0.074612  0.212586            43         Guedes  Wolves  0.005567   
549 -0.342090  0.188609            25       Hee Chan  Wolves  0.117026   
552  0.908171 -0.241611  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


here 1
hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.389885 -0.645999            12      Johnstone  Wolves  0.152011   
532  1.357437 -0.362544            56      Aït-Nouri  Wolves  1.842636   
534 -0.089442  0.115876            34     Bellegarde  Wolves  0.008000   
535 -1.028825 -0.406403             0        Bentley  Wolves  1.058482   
538 -1.093643  0.208954             0        Chiwome  Wolves  1.196054   
539 -1.375673  0.251121             0         Cundle  Wolves  1.892475   
540  2.258285  0.892642           111          Cunha  Wolves  5.099850   
541 -0.435039 -0.309356            17         Dawson  Wolves  0.189259   
542  0.148096 -0.208753            27        Doherty  Wolves  0.021932   
543 -0.745922  0.256119            20          Doyle  Wolves  0.556400   
547  0.077145  0.208863            43         Guedes  Wolves  0.005951   
549 -0.339033  0.184206            25       Hee Chan  Wolves  0.114943   
552  0.912090 -0.216959  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


here 1
hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.347626 -0.679024            12      Johnstone  Wolves  0.120844   
532  1.395323 -0.328499            56      Aït-Nouri  Wolves  1.946927   
534 -0.055736  0.190396            34     Bellegarde  Wolves  0.003106   
535 -0.989123 -0.504277             0        Bentley  Wolves  0.978364   
538 -1.058870  0.131019             0        Chiwome  Wolves  1.121206   
539 -1.345158  0.244564             0         Cundle  Wolves  1.809450   
540  2.281211  0.894801           111          Cunha  Wolves  5.203925   
541 -0.402144 -0.231019            17         Dawson  Wolves  0.161719   
542  0.187545 -0.206814            27        Doherty  Wolves  0.035173   
543 -0.712742  0.285506            20          Doyle  Wolves  0.508002   
547  0.113724  0.193039            43         Guedes  Wolves  0.012933   
549 -0.300500  0.151565            25       Hee Chan  Wolves  0.090300   
552  0.949769 -0.124544  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


here 1
hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.326086 -0.699572            12      Johnstone  Wolves  0.106332   
532  1.411639 -0.289287            56      Aït-Nouri  Wolves  1.992724   
534 -0.042011  0.258584            34     Bellegarde  Wolves  0.001765   
535 -0.968047 -0.584209             0        Bentley  Wolves  0.937115   
538 -1.040404  0.058363             0        Chiwome  Wolves  1.082440   
539 -1.332264  0.237365             0         Cundle  Wolves  1.774927   
540  2.291929  0.877011           111          Cunha  Wolves  5.252940   
541 -0.387742 -0.154756            17         Dawson  Wolves  0.150344   
542  0.205944 -0.205519            27        Doherty  Wolves  0.042413   
543 -0.698762  0.306517            20          Doyle  Wolves  0.488269   
547  0.130858  0.168452            43         Guedes  Wolves  0.017124   
549 -0.282809  0.117100            25       Hee Chan  Wolves  0.079981   
552  0.965294 -0.033055  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


here 1
hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.320561 -0.700947            12      Johnstone  Wolves  0.102759   
532  1.416462 -0.259830            56      Aït-Nouri  Wolves  2.006365   
534 -0.037595  0.307119            34     Bellegarde  Wolves  0.001413   
535 -0.962636 -0.627376             0        Bentley  Wolves  0.926669   
538 -1.035068  0.016174             0        Chiwome  Wolves  1.071366   
539 -1.328093  0.239037             0         Cundle  Wolves  1.763830   
540  2.295744  0.852953           111          Cunha  Wolves  5.270441   
541 -0.383587 -0.094995            17         Dawson  Wolves  0.147139   
542  0.211165 -0.197446            27        Doherty  Wolves  0.044591   
543 -0.694353  0.326512            20          Doyle  Wolves  0.482126   
547  0.135963  0.156526            43         Guedes  Wolves  0.018486   
549 -0.277392  0.100451            25       Hee Chan  Wolves  0.076946   
552  0.970224  0.031026  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


here 1
hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.313104 -0.700850            12      Johnstone  Wolves  0.098034   
532  1.423803 -0.256633            56      Aït-Nouri  Wolves  2.027216   
534 -0.029777  0.313021            34     Bellegarde  Wolves  0.000887   
535 -0.955739 -0.632544             0        Bentley  Wolves  0.913437   
538 -1.027079  0.010320             0        Chiwome  Wolves  1.054890   
539 -1.320919  0.238719             0         Cundle  Wolves  1.744828   
540  2.302502  0.848550           111          Cunha  Wolves  5.301514   
541 -0.375986 -0.087952            17         Dawson  Wolves  0.141365   
542  0.218745 -0.196140            27        Doherty  Wolves  0.047849   
543 -0.686862  0.328982            20          Doyle  Wolves  0.471780   
547  0.143655  0.155119            43         Guedes  Wolves  0.020637   
549 -0.269466  0.098199            25       Hee Chan  Wolves  0.072612   
552  0.978451  0.038526  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


here 1
hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.308366 -0.701371            12      Johnstone  Wolves  0.095090   
532  1.428741 -0.258912            56      Aït-Nouri  Wolves  2.041300   
534 -0.024777  0.308413            34     Bellegarde  Wolves  0.000614   
535 -0.951628 -0.629360             0        Bentley  Wolves  0.905595   
538 -1.023958  0.014693             0        Chiwome  Wolves  1.048489   
539 -1.316399  0.238252             0         Cundle  Wolves  1.732907   
540  2.306077  0.852066           111          Cunha  Wolves  5.317989   
541 -0.370525 -0.093640            17         Dawson  Wolves  0.137289   
542  0.223537 -0.197334            27        Doherty  Wolves  0.049969   
543 -0.682218  0.326693            20          Doyle  Wolves  0.465421   
547  0.147937  0.155950            43         Guedes  Wolves  0.021885   
549 -0.265015  0.099372            25       Hee Chan  Wolves  0.070233   
552  0.983653  0.033097  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


here 1
hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.303856 -0.703879            12      Johnstone  Wolves  0.092329   
532  1.431566 -0.253827            56      Aït-Nouri  Wolves  2.049382   
534 -0.021525  0.310119            34     Bellegarde  Wolves  0.000463   
535 -0.947354 -0.636265             0        Bentley  Wolves  0.897481   
538 -1.020943  0.008042             0        Chiwome  Wolves  1.042325   
539 -1.314000  0.236370             0         Cundle  Wolves  1.726596   
540  2.307268  0.853702           111          Cunha  Wolves  5.323487   
541 -0.367737 -0.087975            17         Dawson  Wolves  0.135231   
542  0.227767 -0.200122            27        Doherty  Wolves  0.051878   
543 -0.678667  0.323870            20          Doyle  Wolves  0.460589   
547  0.151909  0.150291            43         Guedes  Wolves  0.023076   
549 -0.261248  0.093421            25       Hee Chan  Wolves  0.068250   
552  0.986541  0.039507  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


here 1
hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.298763 -0.700656            12      Johnstone  Wolves  0.089259   
532  1.438089 -0.234712            56      Aït-Nouri  Wolves  2.068100   
534 -0.013078  0.347201            34     Bellegarde  Wolves  0.000171   
535 -0.943136 -0.665635             0        Bentley  Wolves  0.889506   
538 -1.014046 -0.022350             0        Chiwome  Wolves  1.028290   
539 -1.306361  0.239608             0         Cundle  Wolves  1.706580   
540  2.314685  0.819022           111          Cunha  Wolves  5.357767   
541 -0.360129 -0.040083            17         Dawson  Wolves  0.129693   
542  0.234435 -0.192724            27        Doherty  Wolves  0.054960   
543 -0.670601  0.340043            20          Doyle  Wolves  0.449706   
547  0.159351  0.140971            43         Guedes  Wolves  0.025393   
549 -0.253669  0.082070            25       Hee Chan  Wolves  0.064348   
552  0.994919  0.087899  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)


here 1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.287211 -0.704699            12      Johnstone  Wolves  0.082490   
532  1.445054 -0.242227            56      Aït-Nouri  Wolves  2.088181   
534  0.002488  0.311891            34     Bellegarde  Wolves  0.000006   
535 -0.935143 -0.642997             0        Bentley  Wolves  0.874492   
538 -1.008935  0.007856             0        Chiwome  Wolves  1.017949   
539 -1.297625  0.242227             0         Cundle  Wolves  1.683831   
540  2.318279  0.851679           111          Cunha  Wolves  5.374418   
541 -0.347875 -0.072470            17         Dawson  Wolves  0.121017   
542  0.248093 -0.206759            27        Doherty  Wolves  0.061550   
543 -0.654636  0.318069            20          Doyle  Wolves  0.428548   
547  0.173126  0.137842            43         Guedes  Wolves  0.029973   
549 -0.242437  0.086191            25       Hee Chan  Wolves  0.058776   
552  1.006066  0.053978         

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)


here 1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.275325 -0.706915            12      Johnstone  Wolves  0.075804   
532  1.455281 -0.231995            56      Aït-Nouri  Wolves  2.117842   
534  0.011400  0.321388            34     Bellegarde  Wolves  0.000130   
535 -0.923386 -0.657241             0        Bentley  Wolves  0.852643   
538 -0.998942 -0.008347             0        Chiwome  Wolves  0.997885   
539 -1.288633  0.238012             0         Cundle  Wolves  1.660574   
540  2.325463  0.848802           111          Cunha  Wolves  5.407778   
541 -0.338205 -0.058566            17         Dawson  Wolves  0.114383   
542  0.258617 -0.206507            27        Doherty  Wolves  0.066883   
543 -0.645851  0.318653            20          Doyle  Wolves  0.417124   
547  0.182845  0.131683            43         Guedes  Wolves  0.033432   
549 -0.232005  0.077367            25       Hee Chan  Wolves  0.053827   
552  1.015926  0.070178         

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)


here 1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.253171 -0.709845            12      Johnstone  Wolves  0.064095   
532  1.471744 -0.198831            56      Aït-Nouri  Wolves  2.166030   
534  0.023925  0.351737            34     Bellegarde  Wolves  0.000572   
535 -0.899924 -0.698617             0        Bentley  Wolves  0.809863   
538 -0.977788 -0.061004             0        Chiwome  Wolves  0.956070   
539 -1.274650  0.225195             0         Cundle  Wolves  1.624732   
540  2.336592  0.834926           111          Cunha  Wolves  5.459662   
541 -0.324865 -0.013161            17         Dawson  Wolves  0.105538   
542  0.276630 -0.203098            27        Doherty  Wolves  0.076524   
543 -0.632591  0.321504            20          Doyle  Wolves  0.400172   
547  0.199864  0.113332            43         Guedes  Wolves  0.039946   
549 -0.213370  0.050781            25       Hee Chan  Wolves  0.045527   
552  1.030599  0.121140         

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


here 1
hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.233891 -0.713046            12      Johnstone  Wolves  0.054705   
532  1.485761 -0.185154            56      Aït-Nouri  Wolves  2.207485   
534  0.039882  0.349719            34     Bellegarde  Wolves  0.001591   
535 -0.881317 -0.708851             0        Bentley  Wolves  0.776719   
538 -0.961096 -0.074818             0        Chiwome  Wolves  0.923705   
539 -1.260760  0.218561             0         Cundle  Wolves  1.589517   
540  2.346175  0.844375           111          Cunha  Wolves  5.504535   
541 -0.309997 -0.007019            17         Dawson  Wolves  0.096098   
542  0.294712 -0.207348            27        Doherty  Wolves  0.086855   
543 -0.615974  0.312144            20          Doyle  Wolves  0.379424   
547  0.217478  0.103549            43         Guedes  Wolves  0.047297   
549 -0.195898  0.039654            25       Hee Chan  Wolves  0.038376   
552  1.045735  0.130111  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


here 1
hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.224036 -0.714811            12      Johnstone  Wolves  0.050192   
532  1.493274 -0.178256            56      Aït-Nouri  Wolves  2.229867   
534  0.047512  0.350779            34     Bellegarde  Wolves  0.002257   
535 -0.871307 -0.716078             0        Bentley  Wolves  0.759175   
538 -0.951502 -0.084649             0        Chiwome  Wolves  0.905356   
539 -1.253232  0.214357             0         Cundle  Wolves  1.570591   
540  2.351692  0.846492           111          Cunha  Wolves  5.530454   
541 -0.302595 -0.001793            17         Dawson  Wolves  0.091564   
542  0.303704 -0.209277            27        Doherty  Wolves  0.092236   
543 -0.607938  0.308211            20          Doyle  Wolves  0.369589   
547  0.226327  0.097822            43         Guedes  Wolves  0.051224   
549 -0.186701  0.032632            25       Hee Chan  Wolves  0.034857   
552  1.053403  0.136718  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


here 1
hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.224036 -0.714811            12      Johnstone  Wolves  0.050192   
532  1.493274 -0.178257            56      Aït-Nouri  Wolves  2.229867   
534  0.047512  0.350780            34     Bellegarde  Wolves  0.002257   
535 -0.871307 -0.716078             0        Bentley  Wolves  0.759175   
538 -0.951502 -0.084648             0        Chiwome  Wolves  0.905356   
539 -1.253232  0.214358             0         Cundle  Wolves  1.570591   
540  2.351692  0.846490           111          Cunha  Wolves  5.530454   
541 -0.302595 -0.001792            17         Dawson  Wolves  0.091564   
542  0.303704 -0.209278            27        Doherty  Wolves  0.092236   
543 -0.607938  0.308211            20          Doyle  Wolves  0.369589   
547  0.226327  0.097821            43         Guedes  Wolves  0.051224   
549 -0.186701  0.032632            25       Hee Chan  Wolves  0.034857   
552  1.053403  0.136718  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


here 1
hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.198767 -0.721831            12      Johnstone  Wolves  0.039508   
532  1.511963 -0.163442            56      Aït-Nouri  Wolves  2.286033   
534  0.069012  0.344830            34     Bellegarde  Wolves  0.004763   
535 -0.847415 -0.730360             0        Bentley  Wolves  0.718112   
538 -0.931897 -0.100515             0        Chiwome  Wolves  0.868432   
539 -1.234733  0.205088             0         Cundle  Wolves  1.524566   
540  2.363904  0.857522           111          Cunha  Wolves  5.588043   
541 -0.282353  0.004630            17         Dawson  Wolves  0.079723   
542  0.327621 -0.219023            27        Doherty  Wolves  0.107336   
543 -0.585598  0.292281            20          Doyle  Wolves  0.342925   
547  0.249301  0.081047            43         Guedes  Wolves  0.062151   
549 -0.164014  0.015026            25       Hee Chan  Wolves  0.026901   
552  1.073404  0.145884  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


here 1
hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.198767 -0.721831            12      Johnstone  Wolves  0.039508   
532  1.511963 -0.163442            56      Aït-Nouri  Wolves  2.286033   
534  0.069012  0.344830            34     Bellegarde  Wolves  0.004763   
535 -0.847415 -0.730360             0        Bentley  Wolves  0.718112   
538 -0.931897 -0.100516             0        Chiwome  Wolves  0.868432   
539 -1.234733  0.205088             0         Cundle  Wolves  1.524566   
540  2.363904  0.857521           111          Cunha  Wolves  5.588043   
541 -0.282353  0.004631            17         Dawson  Wolves  0.079723   
542  0.327621 -0.219023            27        Doherty  Wolves  0.107336   
543 -0.585598  0.292281            20          Doyle  Wolves  0.342925   
547  0.249301  0.081047            43         Guedes  Wolves  0.062151   
549 -0.164014  0.015027            25       Hee Chan  Wolves  0.026901   
552  1.073404  0.145885  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)


here 1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.190200 -0.719505            12      Johnstone  Wolves  0.036176   
532  1.520264 -0.155671            56      Aït-Nouri  Wolves  2.311204   
534  0.077250  0.352901            34     Bellegarde  Wolves  0.005968   
535 -0.838465 -0.736750             0        Bentley  Wolves  0.703024   
538 -0.920843 -0.110458             0        Chiwome  Wolves  0.847952   
539 -1.225522  0.204328             0         Cundle  Wolves  1.501904   
540  2.372040  0.850739           111          Cunha  Wolves  5.626572   
541 -0.274030  0.016186            17         Dawson  Wolves  0.075093   
542  0.335944 -0.215899            27        Doherty  Wolves  0.112858   
543 -0.577316  0.295113            20          Doyle  Wolves  0.333293   
547  0.258028  0.078969            43         Guedes  Wolves  0.066578   
549 -0.154365  0.011860            25       Hee Chan  Wolves  0.023829   
552  1.082322  0.158069         

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)
  self.variances_ = np.nanvar(X, axis=0)
  self.variances_ = np.nanmin(compare_arr, axis=0)


here 1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['Score'] = (explained_variance_pc1 * pca_df['PC1^2']) + (explained_variance_pc2 * pca_df['PC2'])


hello           PC1       PC2  Total Points       web_name    team     PC1^2  \
203 -0.180562 -0.721524            12      Johnstone  Wolves  0.032603   
532  1.529166 -0.157669            56      Aït-Nouri  Wolves  2.338348   
534  0.086261  0.347121            34     Bellegarde  Wolves  0.007441   
535 -0.829505 -0.734396             0        Bentley  Wolves  0.688079   
538 -0.913854 -0.105080             0        Chiwome  Wolves  0.835130   
539 -1.216889  0.202325             0         Cundle  Wolves  1.480818   
540  2.378599  0.858797           111          Cunha  Wolves  5.657733   
541 -0.264470  0.007893            17         Dawson  Wolves  0.069945   
542  0.345091 -0.217384            27        Doherty  Wolves  0.119088   
543 -0.568484  0.291874            20          Doyle  Wolves  0.323174   
547  0.266495  0.080565            43         Guedes  Wolves  0.071020   
549 -0.145451  0.012944            25       Hee Chan  Wolves  0.021156   
552  1.091457  0.151733         

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_df['team_id'] = players_df['team'].map(reversed_team_mapping)


In [1219]:
final_results_df.head()

Unnamed: 0,gameweek,id,web_name,team,position,can_transact,can_select,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,ep_next,ep_this,event_points,first_name,form,in_dreamteam,news,news_added,now_cost,photo,points_per_game,removed,second_name,selected_by_percent,special,squad_number,status,team_code,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,region,team_join_date,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,expected_goals_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,goals_conceded_per_90,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,direct_freekicks_order,penalties_order,element_type,Score,Cluster,PC1,team_id,adjusted_fdr,average_fdr,adjusted_score,adjusted_fdr_avg,average_fdr_avg
0,1,328,M.Salah,Liverpool,midfielder,True,True,,,118748,0,0,11,-11,8,14.8,14.8,16,Mohamed,13.8,True,,,13.6,118748.jpg,11.2,False,Salah,69.2,False,,a,14,202,5801030,65813,2074115,2446,1.0,14.9,63.0,2017-07-01,1583,17,13,8,17,0,0,0,1,0,0,35,654,929.8,494.3,1142.0,256.5,18,14.8,4.12,18.92,16.4,0.84,0.23,1.07,0.93,0.97,1,1,1,1,1,1,1.02,0.45,,2.0,3,5.256952,0,3.164184,12,5.0,2.0,15.256952,5.0,2.0
1,1,182,Palmer,Chelsea,midfielder,True,True,100.0,100.0,244851,0,0,9,-9,6,7.7,7.7,2,Cole,7.2,True,,2024-11-12T14:30:07.656076Z,11.4,244851.jpg,7.5,False,Palmer,68.8,False,,a,8,143,6088143,50328,2630321,36087,0.6,12.5,241.0,2023-09-01,1658,12,7,4,22,0,0,0,3,0,0,23,609,721.6,711.4,596.0,203.1,19,10.41,4.93,15.34,24.66,0.57,0.27,0.84,1.34,1.19,4,3,2,2,2,2,1.03,0.22,1.0,1.0,3,3.408865,0,2.548028,6,3.0,4.0,9.408865,3.0,4.0
2,1,401,Isak,Newcastle,forward,True,True,100.0,100.0,219168,1,-1,7,-7,2,9.8,10.3,7,Alexander,10.3,True,,2024-11-30T22:00:07.827670Z,9.2,219168.jpg,6.6,False,Isak,48.7,False,,a,4,112,5344925,216841,5828901,14293,1.1,12.2,206.0,2022-08-26,1380,12,4,6,19,0,0,0,0,0,0,20,460,581.0,378.1,720.0,167.3,17,10.31,2.37,12.68,18.79,0.67,0.15,0.82,1.23,1.24,2,1,4,1,3,1,1.11,0.39,3.0,2.0,4,2.665491,0,2.238759,15,5.0,2.0,12.665491,5.0,2.0
3,1,541,Cunha,Wolves,forward,True,True,0.0,,430871,0,0,6,-6,3,0.0,5.2,1,Matheus,5.7,True,Suspended until 15 Jan,2024-12-30T09:30:07.234687Z,7.1,430871.jpg,5.8,False,Santos Carneiro Da Cunha,22.8,False,,s,39,111,3985070,61241,1801430,332146,0.8,15.6,30.0,2023-07-01,1560,10,4,3,39,0,0,0,2,0,0,25,527,604.8,498.6,484.0,158.8,18,4.06,3.46,7.52,27.6,0.23,0.2,0.43,1.59,2.25,15,3,7,2,16,5,1.04,0.17,3.0,3.0,4,2.662523,0,2.25347,20,2.0,5.0,6.662523,2.0,5.0
4,1,99,Mbeumo,Brentford,midfielder,True,True,100.0,100.0,446008,0,0,6,-6,3,5.3,4.3,0,Bryan,4.8,True,,2024-11-13T15:00:08.005055Z,7.6,446008.jpg,6.1,False,Mbeumo,28.6,False,,a,94,110,5864614,95328,3374871,50674,0.6,14.5,38.0,2019-08-05,1615,10,2,3,30,0,0,0,2,0,0,17,428,584.8,509.7,379.0,147.5,18,4.89,4.09,8.98,29.41,0.27,0.23,0.5,1.64,1.67,32,15,5,4,8,3,1.0,0.17,2.0,2.0,3,2.293019,0,2.096956,4,4.0,3.0,10.293019,4.0,3.0


In [1177]:
final_results_df.to_csv("static_fpl_scores.csv")

In [1178]:
final_results_df['adjusted_score'].describe()

count    2391.000000
mean        8.719622
std         2.164375
min         0.000000
25%         7.054088
50%         8.834526
75%        10.542398
max        15.627283
Name: adjusted_score, dtype: float64

In [1179]:
final_results_df.columns

Index(['gameweek', 'id', 'web_name', 'team', 'position', 'can_transact',
       'can_select', 'chance_of_playing_next_round',
       'chance_of_playing_this_round', 'code', 'cost_change_event',
       'cost_change_event_fall', 'cost_change_start', 'cost_change_start_fall',
       'dreamteam_count', 'ep_next', 'ep_this', 'event_points', 'first_name',
       'form', 'in_dreamteam', 'news', 'news_added', 'now_cost', 'photo',
       'points_per_game', 'removed', 'second_name', 'selected_by_percent',
       'special', 'squad_number', 'status', 'team_code', 'total_points',
       'transfers_in', 'transfers_in_event', 'transfers_out',
       'transfers_out_event', 'value_form', 'value_season', 'region',
       'team_join_date', 'minutes', 'goals_scored', 'assists', 'clean_sheets',
       'goals_conceded', 'own_goals', 'penalties_saved', 'penalties_missed',
       'yellow_cards', 'red_cards', 'saves', 'bonus', 'bps', 'influence',
       'creativity', 'threat', 'ict_index', 'starts', 'expected_

In [1180]:
wolves_web_names = final_results_df[final_results_df['team'] == 'Wolves']['element_type'].unique()
print(wolves_web_names)

[4 2 3]


In [1181]:
len(final_results_df)

2391

In [1225]:
# Group by 'gameweek' and 'team', calculate the average 'adjusted_score'
grouped_results_df = final_results_df.groupby(['gameweek', 'team'], as_index=False).agg(
    avg_adjusted_score=('adjusted_score', 'mean')
)
grouped_results_df.tail(20)

Unnamed: 0,gameweek,team,avg_adjusted_score
360,19,Arsenal,0.0
361,19,Aston Villa,8.962402
362,19,Bournemouth,8.929617
363,19,Brentford,0.0
364,19,Brighton,6.742145
365,19,Chelsea,11.373515
366,19,Crystal Palace,10.914909
367,19,Everton,9.039789
368,19,Fulham,9.137281
369,19,Ipswich,6.752342


In [1183]:
grouped_results_df.to_csv("grouped_fpl_historical_data.csv")

In [1184]:
grouped_results_df['gameweek'].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19])

In [1185]:
# Filtering players_df to show all rows where the team is 'Wolves'
wolves_players_df = grouped_results_df[grouped_results_df['team'] == "Spurs"]
wolves_players_df.head()

Unnamed: 0,gameweek,team,avg_adjusted_score
17,1,Spurs,11.105935
37,2,Spurs,11.086411
57,3,Spurs,7.153797
77,4,Spurs,7.174587
97,5,Spurs,11.181034


In [1186]:
team_name_mapping = {
    'Man Utd': 'Manchester Utd',
    'Ipswich': 'Ipswich Town',
    'Arsenal': 'Arsenal',
    'Everton': 'Everton',
    'Newcastle': 'Newcastle Utd',
    "Nott'm Forest": 'Nottingham Forest',
    'West Ham': 'West Ham',
    'Brentford': 'Brentford',
    'Chelsea': 'Chelsea',
    'Leicester': 'Leicester City',
    'Brighton': 'Brighton',
    'Crystal Palace': 'Crystal Palace',
    'Fulham': 'Fulham',
    'Man City': 'Manchester City',
    'Southampton': 'Southampton',
    'Spurs': 'Tottenham',  # Add mapping for 'Spurs'
    'Aston Villa': 'Aston Villa',
    'Bournemouth': 'Bournemouth',
    'Wolves': 'Wolves',
    'Liverpool': 'Liverpool'
}