In [None]:
import warnings
warnings.filterwarnings("ignore")


In [None]:
import pandas as pd
import sys
import matplotlib.pyplot as plt 
import numpy as np 
import os 
import pandas as pd
from concurrent.futures import ThreadPoolExecutor ,ProcessPoolExecutor,as_completed
import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
cwd = os.getcwd()
# Construct the full path to the 'FPL' directory
fpl_path = os.path.join(cwd, '..', '..', 'FPL')
# Add it to the system path
sys.path.append(fpl_path)

In [None]:
from fpl_api_collection import (
    get_bootstrap_data,
    get_current_gw,
    get_fixt_dfs,
    get_fixture_data,
    get_player_id_dict,
    get_current_season,
    get_player_data,
    remove_moved_players
)

In [None]:
# Retrieve and prepare player data
ele_types_data = get_bootstrap_data()['element_types']
ele_types_df = pd.DataFrame(ele_types_data)
ele_data = get_bootstrap_data()['elements']
ele_df = pd.DataFrame(ele_data)
ele_df['element_type'] = ele_df['element_type'].map(ele_types_df.set_index('id')['singular_name_short'])
ele_df['logo_player'] = "https://resources.premierleague.com/premierleague/photos/players/250x250/p" + ele_df['code'].astype(str) + ".png"
ele_copy = ele_df.copy()

# Retrieve and prepare team data
teams_data = get_bootstrap_data()['teams']
teams_df = pd.DataFrame(teams_data)
teams_df['logo_url'] = "https://resources.premierleague.com/premierleague/badges/70/t" + teams_df['code'].astype(str) + ".png"

# Map team IDs to names for fixture processing
team_name_mapping = pd.Series(teams_df.name.values, index=teams_df.id).to_dict()
ele_copy['team_name'] = ele_copy['team'].map(teams_df.set_index('id')['short_name'])
ele_copy['full_name'] = ele_copy['first_name'].str.cat(ele_copy['second_name'].str.cat(ele_copy['team_name'].apply(lambda x: f" ({x})"), sep=''), sep=' ')

# Retrieve player dictionary and current season/gameweek
full_player_dict = get_player_id_dict('total_points', web_name=False)
crnt_season = get_current_season()
ct_gw = get_current_gw()

# Retrieve and process fixture data
fixture_data = get_fixture_data()
fixtures_df = pd.DataFrame(fixture_data)
fixtures_df.drop(columns='stats', inplace=True)
fixtures_df = fixtures_df.merge(teams_df[['id', 'logo_url']], left_on='team_h', right_on='id', how='left').rename(columns={'logo_url': 'team_h_logo'})
fixtures_df = fixtures_df.merge(teams_df[['id', 'logo_url']], left_on='team_a', right_on='id', how='left').rename(columns={'logo_url': 'team_a_logo'})
fixtures_df['team_h'] = fixtures_df['team_h'].replace(team_name_mapping)
fixtures_df['team_a'] = fixtures_df['team_a'].replace(team_name_mapping)
fixtures_df = fixtures_df.drop(columns=['pulse_id'])

# Format fixture dates
timezone = 'Europe/London'
fixtures_df['datetime'] = pd.to_datetime(fixtures_df['kickoff_time'], utc=True)
fixtures_df['local_time'] = fixtures_df['datetime'].dt.tz_convert(timezone).dt.strftime('%A %d %B %Y %H:%M')
fixtures_df['local_date'] = fixtures_df['datetime'].dt.tz_convert(timezone).dt.strftime('%d %A %B %Y')
fixtures_df['local_hour'] = fixtures_df['datetime'].dt.tz_convert(timezone).dt.strftime('%H:%M')

# Retrieve fixture difficulty rating data
team_fdr_df, team_fixt_df, team_ga_df, team_gf_df = get_fixt_dfs()
full_player_dict = get_player_id_dict('total_points', web_name=False)

In [None]:
def convert_score_to_result(df):
    df.loc[df['was_home'] == True, 'result'] = df['team_h_score'] \
        .astype('Int64').astype(str) \
        + '-' + df['team_a_score'].astype('Int64').astype(str)
    df.loc[df['was_home'] == False, 'result'] = df['team_a_score'] \
        .astype('Int64').astype(str) \
        + '-' + df['team_h_score'].astype('Int64').astype(str)
        
def convert_opponent_string(df):
    df.loc[df['was_home'] == True, 'vs'] = df['vs'] + ' (A)'
    df.loc[df['was_home'] == False, 'vs'] = df['vs'] + ' (H)'
    df.loc[df['was_home'] == True, 'Team_player'] = df['Team_player'] + ' (H)'
    df.loc[df['was_home'] == False, 'Team_player'] = df['Team_player'] + ' (A)'
    return df

def collate_hist_df_from_name(player_name):
    p_id = [k for k, v in full_player_dict.items() if v == player_name]
    position = ele_copy.loc[ele_copy['full_name'] == player_name, 'element_type'].iloc[0]
    Team = ele_copy.loc[ele_copy['full_name'] == player_name, 'team_name'].iloc[0]
    p_data = get_player_data(str(p_id[0]))
    p_df = pd.DataFrame(p_data['history'])
    convert_score_to_result(p_df)
    p_df.loc[p_df['result'] == '<NA>-<NA>', 'result'] = '-'
    rn_dict = {'round': 'GW','kickoff_time':'kickoff_time', 'opponent_team': 'vs', 'total_points': 'Pts',
               'minutes': 'Mins', 'goals_scored': 'GS', 'assists': 'A',
               'clean_sheets': 'CS', 'goals_conceded': 'GC', 'own_goals': 'OG',
               'penalties_saved': 'Pen_Save', 'penalties_missed': 'Pen_Miss',
               'yellow_cards': 'YC', 'red_cards': 'RC', 'saves': 'S',
               'bonus': 'B', 'bps': 'BPS', 'influence': 'I', 'creativity': 'C',
               'threat': 'T', 'ict_index': 'ICT', 'value': 'Price',
               'selected': 'SB', 'transfers_in': 'Tran_In',
               'transfers_out': 'Tran_Out', 'expected_goals': 'xG',
               'expected_assists': 'xA', 'expected_goal_involvements': 'xGI',
               'expected_goals_conceded': 'xGC', 'result': 'Result'}
    p_df.rename(columns=rn_dict, inplace=True)
    col_order = ['GW','kickoff_time', 'vs', 'Result', 'Pts', 'Mins', 'GS', 'xG', 'A', 'xA',
                 'xGI', 'Pen_Miss', 'CS', 'GC', 'xGC', 'OG', 'Pen_Save', 'S',
                 'YC', 'RC', 'B', 'BPS', 'Price', 'I', 'C', 'T', 'ICT', 'SB',
                 'Tran_In', 'Tran_Out', 'was_home']
    p_df = p_df[col_order]
    # map opponent teams
    
    p_df['Price'] = p_df['Price']/10
    p_df['vs'] = p_df['vs'].map(teams_df.set_index('id')['short_name'])
    p_df['Pos'] = position
    p_df['Team_player'] = Team
    #convert_opponent_string(p_df)
    #p_df.drop('was_home', axis=1, inplace=True)
    #p_df.set_index('GW', inplace=True)
    p_df.sort_values('GW', ascending=False, inplace=True)
    return p_df

In [None]:
def collate_all_players_parallel(full_player_dict, max_workers=None):
    # Determine optimal max_workers if not provided
    if max_workers is None:
        max_workers = os.cpu_count() * 2  # Suitable for I/O-bound tasks like web scraping

    # Define a helper function to retrieve data for a single player
    def get_player_data(player_name):
        try:  # Add exception handling inside the worker function
            player_df = collate_hist_df_from_name(player_name)
            player_df['Player'] = player_name  # Add player name column
            return player_df
        except Exception as e:
            print(f"Error processing {player_name}: {e}")
            return pd.DataFrame() # Return empty DataFrame on error


    # Use ThreadPoolExecutor with a with statement for proper resource management
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit tasks and store futures in a dictionary for easier error handling
        futures = {executor.submit(get_player_data, player_name): player_name 
                   for player_name in full_player_dict.values()}

        results = []
        for future in as_completed(futures):
            player_name = futures[future]
            try:
                result_df = future.result()  # Get the result or raise an exception
                results.append(result_df)
            except Exception as e:
                print(f"Error retrieving result for {player_name}: {e}")

    # Concatenate all successful results into a single DataFrame outside the loop
    all_players_df = pd.concat(results, axis=0, ignore_index=True)  # ignore_index for cleaner index
    return all_players_df

In [None]:
all_players_data = collate_all_players_parallel(full_player_dict)

In [None]:
all_players_data

In [None]:
merged_home = pd.merge(all_players_data, teams_df[['short_name',
                                                    'strength_overall_home', 
                                                    'strength_overall_away', 
                                                    'strength_attack_home', 
                                                    'strength_attack_away', 
                                                    'strength_defence_home', 
                                                    'strength_defence_away']],
                       left_on='Team_player', 
                       right_on='short_name', 
                       how='left')

# Merge for the opponent team
merged_opponent = pd.merge(merged_home, 
                            teams_df[['short_name',
                                       'strength_overall_home', 
                                       'strength_overall_away', 
                                       'strength_attack_home', 
                                       'strength_attack_away', 
                                       'strength_defence_home', 
                                       'strength_defence_away']],
                            left_on='vs', 
                            right_on='short_name', 
                            how='left', 
                            suffixes=('', '_opponent'))

# Optionally drop the 'short_name' columns for opponents if you don't need them
merged_opponent = merged_opponent.drop(columns=['short_name', 'short_name_opponent'])
merged_opponent=convert_opponent_string(merged_opponent)


In [None]:
merged_opponent

In [None]:
team_fdr_df, team_fixt_df, team_ga_df, team_gf_df = get_fixt_dfs()

ct_gw = get_current_gw()

new_fixt_df = team_fixt_df.loc[:, ct_gw:(ct_gw+2)]
new_fixt_cols = ['GW' + str(col) for col in new_fixt_df.columns.tolist()]
new_fixt_df.columns = new_fixt_cols

new_fdr_df = team_fdr_df.loc[:, ct_gw:(ct_gw+2)]

def get_home_away_str_dict():
    new_fdr_df.columns = new_fixt_cols
    result_dict = {}
    for col in new_fdr_df.columns:
        values = list(new_fdr_df[col])
        max_length = new_fixt_df[col].str.len().max()
        if max_length > 7:
            new_fixt_df.loc[new_fixt_df[col].str.len() <= 7, col] = new_fixt_df[col].str.pad(width=max_length+9, side='both', fillchar=' ')
        strings = list(new_fixt_df[col])
        value_dict = {}
        for value, string in zip(values, strings):
            if value not in value_dict:
                value_dict[value] = []
            value_dict[value].append(string)
        result_dict[col] = value_dict
    
    merged_dict = {}
    for k, dict1 in result_dict.items():
        for key, value in dict1.items():
            if key in merged_dict:
                merged_dict[key].extend(value)
            else:
                merged_dict[key] = value
    for k, v in merged_dict.items():
        decoupled_list = list(set(v))
        merged_dict[k] = decoupled_list
    for i in range(1,6):
        if i not in merged_dict:
            merged_dict[i] = []
    return merged_dict
	
	

In [None]:
sui=get_home_away_str_dict()

In [None]:
team_fdr_df, team_fixt_df, team_ga_df, team_gf_df = get_fixt_dfs()

ct_gw = get_current_gw()

new_fixt_df = team_fixt_df.loc[:, ct_gw:(ct_gw+2)]
new_fixt_cols = ['GW' + str(col) for col in new_fixt_df.columns.tolist()]
new_fixt_df.columns = new_fixt_cols

def create_team_fdr_dataframe():


    # Create a list to store the results
    team_fdr_list = []

    for col in new_fdr_df.columns:
        # Get the values from the FDR DataFrame
        fdr_values = new_fdr_df[col].values
        # Get the corresponding teams from the fixture DataFrame
        teams = new_fixt_df[col].values
        
        # Combine teams with their FDR values into the list
        for team, fdr in zip(teams, fdr_values):
            # Ensure that we don't include empty FDR values or teams
            if pd.notna(fdr) and fdr > 0:  # Adjust condition as needed
                team_fdr_list.append({'team': team.strip(), 'fdr': fdr})

    # Create a DataFrame from the list
    team_fdr_df = pd.DataFrame(team_fdr_list)

    return team_fdr_df

# Example usage
team_fdr_df = create_team_fdr_dataframe()


In [None]:
team_fdr_map = dict(zip(team_fdr_df['team'], team_fdr_df['fdr']))

# Map the 'fdr' values to the 'merged_opponent' dataframe based on the 'Team_player' column
merged_opponent['Team_fdr'] = merged_opponent['Team_player'].map(team_fdr_map)
merged_opponent['opponent_fdr'] = merged_opponent['vs'].map(team_fdr_map)

In [None]:
columns_to_convert = ['GW', 'Pts', 'Mins', 'GS', 'xG', 'A', 'xA', 'xGI', 'Pen_Miss', 
                      'CS', 'GC', 'xGC', 'OG', 'Pen_Save', 'S', 'YC', 'RC', 'B', 'BPS', 
                      'Price', 'I', 'C', 'T', 'ICT', 'SB', 'Tran_In', 'Tran_Out', 
                      'strength_overall_home', 'strength_overall_away', 'strength_attack_home', 'strength_attack_away', 
                      'strength_defence_home', 'strength_defence_away', 'strength_overall_home_opponent', 
                      'strength_overall_away_opponent', 'strength_attack_home_opponent', 'strength_attack_away_opponent', 
                      'strength_defence_home_opponent', 'strength_defence_away_opponent', 'Team_fdr', 'opponent_fdr']


# Convert specified columns to float
for col in columns_to_convert:
    merged_opponent[col] = pd.to_numeric(merged_opponent[col], errors='coerce')  # Convert to float and set errors to NaN if conversion fails


In [None]:
merged_opponent['season']=2425

####FIXTURE

In [None]:
next_fixture_gw = fixtures_df[fixtures_df['event']==ct_gw]
next_fixture_gw.drop(['team_h_logo', 'team_a_logo'], axis=1, inplace=True)

In [None]:
# Merge for team_a
new_fix_gw_a = pd.merge(
    next_fixture_gw,
    teams_df[['short_name', 'name']],  # Include 'name' for matching
    left_on='team_a',  # Match with team_a
    right_on='name', 
    how='left'
)

# Rename the short_name column for clarity
new_fix_gw_a.rename(columns={'short_name': 'team_a_short_name'}, inplace=True)

# Merge for team_h
new_fix_gw = pd.merge(
    new_fix_gw_a,
    teams_df[['short_name', 'name']],  # Include 'name' for matching
    left_on='team_h',  # Match with team_h
    right_on='name', 
    how='left'
)

# Rename the short_name column for clarity
new_fix_gw.rename(columns={'short_name': 'team_h_short_name'}, inplace=True)
new_fix_gw = new_fix_gw.drop(columns=['name_x', 'name_y'], errors='ignore')
new_fix_gw['team_h_short_name'] = new_fix_gw['team_h_short_name'] + ' (H)'
new_fix_gw['team_a_short_name'] = new_fix_gw['team_a_short_name'] + ' (A)'


In [None]:
teams_next_gw = pd.concat([new_fix_gw['team_a_short_name'], new_fix_gw['team_h_short_name']]).unique()
filtered_players = merged_opponent

filtered_players[['team_player_score', 'vs_score']] = filtered_players['Result'].str.split('-', expand=True)

# Convert the scores to integers (optional, depending on how you want to use them)
filtered_players['team_player_score'] = filtered_players['team_player_score'].astype(int)
filtered_players['vs_score'] = filtered_players['vs_score'].astype(int)
filtered_players.drop(columns=['Result'], axis=1, inplace=True)

In [None]:
new_fix_gw

In [None]:
new_fix_gw_test = new_fix_gw[['event', 'team_h_short_name', 'team_a_short_name','kickoff_time']].rename(
    columns={
        'event': 'GW',
        'team_h_short_name': 'Team_home',
        'team_a_short_name': 'Team_away',
    }
)

In [None]:
new_fix_gw_test

###model

In [None]:
history_path= os.path.join(cwd, '..', '..', 'data', 'history', 'clean_player_2324.csv')

player_history = pd.read_csv(history_path, index_col=0)

In [None]:
# Concatenating the dataframes vertically
concatenated_df = pd.concat([filtered_players, player_history], ignore_index=True)

# If you want to reset the index after concatenation
concatenated_df.reset_index(drop=True, inplace=True)



In [None]:
new_fix_gw_test['season']=2425

In [None]:
# 1. Calculate the average statistics for each team from df_player
df_player=concatenated_df
df_fixture=new_fix_gw_test


filtered_players_fixture = df_player[
    (df_player['Team_player'].isin(teams_next_gw)) & 
    (df_player['season'] == 2425)
]# 5. Add additional statistics for home and away teams (team strength, FDR, and price from df_player)
df_fixture_home = pd.merge(
    df_fixture,
    filtered_players_fixture[['Team_player', 'Player', 'was_home', 'Pos', 'Price']],
    left_on='Team_home',
    right_on='Team_player',
    how='left'
)

df_fixture_away = pd.merge(
    df_fixture,
    filtered_players_fixture[['Team_player', 'Player', 'was_home', 'Pos', 'Price']],
    left_on='Team_away',
    right_on='Team_player',
    how='left'
)


df_fixture_full = pd.concat([df_fixture_home, df_fixture_away], axis=0, ignore_index=True)

total_stats =filtered_players_fixture.groupby('Player')[['Pts', 'Mins', 'GS', 'xG', 'A', 'xA', 'xGI', 'Pen_Miss', 'CS', 'GC', 
                                           'xGC', 'OG', 'Pen_Save', 'S', 'YC', 'RC', 'B', 'BPS', 'I', 'C', 'T', 
                                           'ICT', 'SB', 'Tran_In', 'Tran_Out']].mean().reset_index()

"""
for col in total_stats.columns[2:]:  # Skip 'Player' and 'Mins'
    total_stats[col] = total_stats[col] / (total_stats['Mins'] / 90)
"""

# 2. Merge df_fixture with df_player based on Team_home (home team stats)
df_pred = pd.merge(df_fixture_full, total_stats,
                           left_on='Player', right_on='Player', how='left')


df_pred['vs'] = df_pred.apply(
    lambda row: row['Team_away'] if row['Team_player'] == row['Team_home'] else row['Team_home'] if row['Team_player'] == row['Team_away'] else None, axis=1
)

# Drop the columns as per the condition
df_pred = df_pred.drop(columns=['Team_home', 'Team_away'])

df_pred['vs_temp'] = df_pred['vs'].str.replace(r'\s?\(.*\)', '', regex=True)
df_pred['Team_player_temp'] = df_pred['Team_player'].str.replace(r'\s?\(.*\)', '', regex=True)


pred_home = pd.merge(df_pred, teams_df[['short_name', 
                                                    'strength_overall_home', 
                                                    'strength_overall_away', 
                                                    'strength_attack_home', 
                                                    'strength_attack_away', 
                                                    'strength_defence_home', 
                                                    'strength_defence_away']],
                       left_on='Team_player_temp', 
                       right_on='short_name', 
                       how='left')

# Merge for the opponent team
pred_opponent = pd.merge(pred_home, 
                            teams_df[['short_name', 
                                       'strength_overall_home', 
                                       'strength_overall_away', 
                                       'strength_attack_home', 
                                       'strength_attack_away', 
                                       'strength_defence_home', 
                                       'strength_defence_away']],
                            left_on='vs_temp', 
                            right_on='short_name', 
                            how='left', 
                            suffixes=('', '_opponent'))

# Optionally drop the 'short_name' columns for opponents if you don't need them
df_next_fixt = pred_opponent.drop(columns=['short_name', 'short_name_opponent','vs_temp','Team_player_temp'])


merged_fdr_home = pd.merge(
    df_next_fixt, 
    team_fdr_df[['team', 'fdr']], 
    left_on='Team_player',  # Assuming 'Team_home' in df_next_fixt, replace with 'Team_player' if necessary
    right_on='team', 
    how='left'
).rename(columns={'fdr': 'Team_fdr'}).drop_duplicates(subset=['Player'])

# Merge FDR data for away teams using the merged_fdr_home result
merged_fdr_away = pd.merge(
    merged_fdr_home, 
    team_fdr_df[['team', 'fdr']], 
    left_on='Team_player',  # Assuming 'Team_away' in df_next_fixt
    right_on='team', 
    how='left'
).rename(columns={'fdr': 'opponent_fdr'}).drop_duplicates(subset=['Player'])
# Drop the extra 'team' columns from the final merged dataframe
df_next_fixt_gw = merged_fdr_away.drop(columns=['team_x', 'team_y'], errors='ignore')


In [None]:
# List of columns to convert to float
columns_to_convert = [
    'GW', 'Pts', 'Mins', 'GS', 'xG', 'A', 'xA', 'xGI', 'Pen_Miss', 'CS', 'GC',
    'xGC', 'OG', 'Pen_Save', 'S', 'YC', 'RC', 'B', 'BPS', 'Price', 'I', 'C',
    'T', 'ICT', 'SB', 'Tran_In', 'Tran_Out', 'strength_overall_home',
    'strength_overall_away', 'strength_attack_home', 'strength_attack_away',
    'strength_defence_home', 'strength_defence_away',
    'strength_overall_home_opponent', 'strength_overall_away_opponent',
    'strength_attack_home_opponent', 'strength_attack_away_opponent',
    'strength_defence_home_opponent', 'strength_defence_away_opponent',
    'Team_fdr', 'opponent_fdr'
]

df_next_fixt_gw[columns_to_convert] = df_next_fixt_gw[columns_to_convert].astype(float)
df_player[columns_to_convert] = df_player[columns_to_convert].astype(float)


In [None]:
df_player.columns

In [None]:
pos_weights = {
    'GKP': {
        'CS': 1.5,    # Clean sheets more important for goalkeepers
        'Pen_Save': 1.3,
        'S': 1.2
    },
    'DEF': {
        'CS': 1.4,
        'B': 1.3,     # Bonus points
        'xGC': 1.2    # Expected goals conceded
    },
    'MID': {
        'GS': 1.3,    # Goals scored
        'A': 1.4,     # Assists
        'xA': 1.2
    },
    'FWD': {
        'GS': 1.5,
        'xG': 1.3,
        'S': 1.1      # Shots taken
    }
}
home_away_weights = {
    True: {  # Home game weights
        'GS': 1.2,         # Goals scored
        'xG': 1.1,         # Expected goals
        'CS': 1.3,         # Clean sheets
        'strength_attack_home': 1.1,
        'strength_defence_home': 1.2
    },
    False: {  # Away game weights
        'GS': 1.1,         # Goals scored
        'xG': 1.0,
        'CS': 1.1,
        'strength_attack_away': 1.0,
        'strength_defence_away': 1.1
    }
}

In [None]:
# Make a copy of X to apply weights
X_weighted = df_player.copy()

In [None]:
features = [ 'Mins', 'GS', 'xG', 'A', 'xA', 'xGI', 'Pen_Miss',
       'CS', 'GC', 'xGC', 'OG', 'Pen_Save', 'S', 'YC', 'RC', 'B', 'BPS',
       'Price', 'I', 'C', 'T', 'ICT', 'SB', 'Tran_In', 'Tran_Out', 'was_home','strength_overall_home',
       'strength_overall_away', 'strength_attack_home', 'strength_attack_away',
       'strength_defence_home', 'strength_defence_away',
       'strength_overall_home_opponent', 'strength_overall_away_opponent',
       'strength_attack_home_opponent', 'strength_attack_away_opponent',
       'strength_defence_home_opponent', 'strength_defence_away_opponent','Team_fdr', 'opponent_fdr','season']
X = X_weighted[features]
y = df_player['Pts']

In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize the XGBoost regressor
xgb_model = XGBRegressor(
    objective='reg:squarederror',
    n_estimators=500,
    learning_rate=0.1,
    max_depth=3,
    min_child_weight=3,
    subsample= 0.8,
    colsample_bytree=0.8,
    gamma=0.1,
    alpha=0,
    random_state=42
)

# Train the model
xgb_model.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    verbose=False  # Set to True if you want to see logs
)

# Make predictions
y_pred = xgb_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

In [None]:
import matplotlib.pyplot as plt
import xgboost as xgb

xgb.plot_importance(xgb_model)
plt.show()

In [None]:
importance = xgb_model.get_booster().get_score(importance_type='weight')  # You can change to 'gain' or 'cover'
importance_df = pd.DataFrame(importance.items(), columns=['Feature', 'Importance'])
importance_df = importance_df.sort_values(by='Importance', ascending=False)

print(importance_df)

In [None]:
ssuiio=df_next_fixt_gw

In [None]:
XX = ssuiio[features]

In [None]:
azdazdazd=xgb_model.predict(XX)

In [None]:
ssuiio['prediction']=azdazdazd

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Sort the dataframe by 'Pos' and 'prediction' to get the top 5 players per position
top_players_per_pos = ssuiio.sort_values(by=['Pos', 'prediction'], ascending=[True, False])

# Group by position and select the top 5 players
top_5_players = top_players_per_pos.groupby('Pos').head(5)

# Create a plot for the top 5 players per position
plt.figure(figsize=(12, 8))
sns.barplot(data=top_5_players, x='prediction', y='Player', hue='Pos', dodge=False)

# Title and labels
plt.title('Top 5 Players per Position Based on Prediction', fontsize=16)
plt.xlabel('Prediction Value', fontsize=14)
plt.ylabel('Player', fontsize=14)
plt.legend(title='Position')

# Display the plot
plt.tight_layout()
plt.show()