In [13]:
import os

import pandas as pd
import numpy as np
from math import pi

from bokeh.io import show, output_notebook, reset_output
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, Panel
from bokeh.models.ranges import FactorRange
from bokeh.models.widgets import Tabs, Select, RadioButtonGroup, Paragraph, Div, CheckboxButtonGroup
from bokeh.transform import dodge
from bokeh.core.properties import value
from bokeh.models import Legend
from bokeh.layouts import column, row, widgetbox
from bokeh.palettes import Set2, RdYlBu, Viridis, Category20_20, Spectral4
from bokeh.transform import cumsum

output_notebook()
# reset_output()

### Data Stuff

In [14]:
pd.options.display.max_columns = None

player_stats_df = pd.read_csv('C:\\PyCharm\\PyCharm Projects\\football_stats\\data_scraping\\data\\players_stats_by_gw.csv')
team_stats_df = pd.read_csv('C:\\PyCharm\\PyCharm Projects\\football_stats\\data_scraping\\data\\teams_stats_by_gw.csv')
player_info_df = pd.read_csv('C:\\PyCharm\\PyCharm Projects\\football_stats\\data_scraping\\data\\players_info.csv')
results_df = pd.read_csv('C:\\PyCharm\\PyCharm Projects\\football_stats\\data_scraping\\data\\matches_results.csv')

results_df.dropna(inplace=True)


pl_teams = sorted(team_stats_df['Team'].unique())

# remove rows of players from teams not in the above list.

# get ids
ids_to_remove = []
idx_to_remove_from_info_df = []
for i, player in player_info_df.iterrows():
    if player['Team'] not in pl_teams:
        ids_to_remove.append(player['pid'])
        idx_to_remove_from_info_df.append(i)

idx_to_remove_from_stats_df = player_stats_df[player_stats_df['pid'].isin(ids_to_remove)].index
player_info_df.drop(index=idx_to_remove_from_info_df, inplace=True)
player_stats_df.drop(index=idx_to_remove_from_stats_df, inplace=True)

player_stats_df.reset_index(inplace=True)
player_info_df.reset_index(inplace=True)

#### Add 'result' and 'opponent' column to teams df

In [15]:
def get_gw_match_result(team_row):
    """Gets a row from team_stats df, returns 'w','d' or 'l'."""
    match_row = results_df[(results_df.isin([team_row['Team']]).any(axis=1)) 
                           & (results_df['Gameweek'] == team_row['Gameweek'])]
    
    result = match_row.iloc[0]['Winner']
    if result == team_row['Team']:
        return 'w'
    elif result == 'Draw':
        return 'd'
    else:
        return 'l'
    

def get_opponent(team_row):
    """Gets a row from team_stats df, returns opponent in gameweek."""
    match_row = results_df[(results_df.isin([team_row['Team']]).any(axis=1)) 
                           & (results_df['Gameweek'] == team_row['Gameweek'])].iloc[0]
    
    if match_row['Home team'] == team_row['Team']:
        return match_row['Away team']
    else:
        return match_row['Home team']



team_stats_df['Match result'] = team_stats_df.apply(get_gw_match_result, axis=1)
team_stats_df['Opponent'] = team_stats_df.apply(get_opponent, axis=1)

#### Join players df and add columns

In [16]:
joined_player_df = pd.merge(player_stats_df, player_info_df, on='pid', how='inner')

# Add columns: result (w/l/d), match score, opponent

joined_player_df.drop(columns=['index_x'], inplace=True)
joined_player_df['Opponent'] = joined_player_df.apply(get_opponent, axis=1)
joined_player_df['Result'] = joined_player_df.apply(get_gw_match_result, axis=1)

## Visualization

In [17]:
# Plot basic stats by teams and matches scores.

def basic_teams_stats_tab(teams_stats_df):
    """Tab with teams stats."""


    def create_data_source(comparison_stat, aggfunc):
        """Returns a pivoted table by teams and match result (w/d/l).
        
        Values are index of comparison_stat.
        """
        
        def get_team_indexed_stat(team_row, stat, aggfunc):
            """Gets a row of team-pivoted table, returns general avg of the team's stat."""
            
            if aggfunc == 'mean':
                df = teams_stats_df.groupby(by='Team').mean()
            elif aggfunc == 'sum':
                df = team_stats_df.groupby(by='Team').sum()
            return df.loc[team_row.name, stat]
        
        
        df = teams_stats_df.pivot_table(
            index='Team', 
            columns='Match result', 
            values=comparison_stat,
            aggfunc=aggfunc
        )

        df['Total'] = df.apply(get_team_indexed_stat, args=[comparison_stat, aggfunc], axis=1)
        
        return df.sort_values(by='Total', ascending=False)

    
    def plot_team_stat(comparison_stat, agg_func):
        
        map_agg_func = ('mean', 'sum')
        data = create_data_source(comparison_stat, map_agg_func[agg_func])
        source = ColumnDataSource(data=data)
        teams = list(source.data['Team'])

        # Plot avg stat per game

        p_1 = figure(x_range=FactorRange(factors=teams), plot_height=600,
                     plot_width=850)


        hover = HoverTool(tooltips=[('', '@{Total}')])
        hover.point_policy = 'follow_mouse'
        p_1.add_tools(hover)


        p_1.vbar(x='Team', top='Total', source=source, width=0.4, color=Spectral4[0])

        p_1.x_range.range_padding = 0.05
        p_1.xaxis.major_label_orientation = 1
        p_1.xaxis.major_label_text_font_size = "10pt"
        p_1.toolbar_location = None


        # Plot breakdown by match score

        p_2 = figure(x_range=FactorRange(factors=teams) , plot_height=600,
                     plot_width=950, tools='hover', tooltips='@$name',
                     title='Breakdown by Match Result')

        w = p_2.vbar(x=dodge('Team', -0.25, range=p_2.x_range), top='w',
                     width=0.2, source=source, color=Spectral4[1], name='w')
        d = p_2.vbar(x=dodge('Team', 0.0, range=p_2.x_range), top='d',
                     width=0.2, source=source, color=Spectral4[2], name='d')
        l = p_2.vbar(x=dodge('Team', 0.25, range=p_2.x_range), top='l',
                     width=0.2, source=source, color=Spectral4[3], name='l')

        legend_it = [('Won', [w]), ('Drew', [d]), ('Lost', [l])]
        legend = Legend(items=legend_it, location=(0, 360))


        p_2.add_layout(legend, 'right')
        p_2.title.text_font_size = '12pt'
        p_2.x_range.range_padding = 0.05
        p_2.xgrid.grid_line_color = None
        p_2.xaxis.major_label_text_font_size = "10pt"
        p_2.xaxis.major_label_orientation = 1
        p_2.toolbar_location = None 
        
        return p_1, p_2
    

    # Update plots on changes
    
    def update(attrname, old, new):
        stat = select_stat.value
        agg_func = choose_agg_func.active
        p1, p2 = plot_team_stat(stat, agg_func)
        layout.children[1:] = [p1, p2]
        
    
    # Widgets
    select_stat = Select(title="Select a Stat for Comparison:", value="Goal", 
                         options=list(teams_stats_df.columns)[1:-3])
    select_stat.on_change('value', update)
    
    choose_agg_func = RadioButtonGroup(labels=['Average per Match', 'Total'],
                                       active=0)
    choose_agg_func.on_change('active', update)
    
    # Wrap widgets
    widgets = widgetbox([select_stat, choose_agg_func])
    
    comparison_stat = select_stat.value
    agg_func_state = choose_agg_func.active 
    
    # Arrange layout
    p1, p2 = plot_team_stat(comparison_stat, agg_func_state)
    layout = column(row(widgets), p1, p2)
    tab = Panel(child=layout, title='Basic Teams Stats')
    
    return tab


In [28]:
# Plot in-depth team stats

def attacks_origin_tab(team_stats_df):
    
    
    def create_ds_for_attacks_origin(team, with_shot=False, opp_attacks=False):
        
        if opp_attacks:
            col_of_interest = 'Opponent'
        else:
            col_of_interest = 'Team'
        
        df = team_stats_df.groupby(by=col_of_interest).sum()
        df.reset_index(inplace=True)

        cols = {'total': ['Left Flank Attacks', 
                          'Right Flank Attacks',
                          'Center Flank Attacks'], 
                'with_shot': ['Left Flank Attacks With Shot',
                              'Right Flank Attacks With Shot',
                              'Center Flank Attacks With Shot']}
        cols_map = {'Left Flank Attacks': 'Left Field',
                    'Right Flank Attacks': 'Right Field',
                    'Center Flank Attacks': 'Center',
                    'Left Flank Attacks With Shot': 'Left Field',
                    'Right Flank Attacks With Shot': 'Right Field',
                    'Center Flank Attacks With Shot': 'Center'}
        if with_shot:
            data = df[df[col_of_interest] == team][cols['with_shot']].reset_index(drop=True)
        else:
            data = df[df[col_of_interest] == team][cols['total']].reset_index(drop=True)
        
        data.rename(mapper=cols_map, axis=1, inplace=True)
        ds = data.transpose().rename(columns={0: 'value'})
        ds['angle'] = ds['value']/ds['value'].sum() * 2*pi
        ds['color'] = Viridis[len(ds)]
        
        return ds
    
    
    def plot_attacks_by_origin(team, opp_attacks=False):
        """Plots data of attacks segmented by origin of attack.
        
        Total num of attacks in a pie chart. Attacks ended with a shot
        in bars.
        """
        # Plot attack in pie chart
        data_pc =create_ds_for_attacks_origin(team, with_shot=False, opp_attacks=opp_attacks)
#         print(data_pc.head(2))
        source_pc = ColumnDataSource(data_pc)

        pc = figure(plot_height=300, plot_width=300, title="Attacks Origins",
                   toolbar_location=None, tools="hover",
                   tooltips="@index: @value", x_range=(-0.5, 1))

        pc.wedge(x=0, y=1, radius=0.4,
                 start_angle=cumsum('angle', include_zero=True),
                 end_angle=cumsum('angle'),line_color="white",
                 fill_color='color', legend='index', source=source_pc)

        pc.axis.axis_label=None
        pc.axis.visible=False
        pc.grid.grid_line_color = None
          
        # Plot attacks ended with a shot        
        data_with_shot = create_ds_for_attacks_origin(team, with_shot=True, opp_attacks=opp_attacks)
        attack_origin = ['Left Field', 'Center', 'Right Field']
        source = ColumnDataSource(data_with_shot)
        
        p = figure(plot_height=300, plot_width=300, title="Attacks Ended With a Shot",
                   toolbar_location=None, tools="hover",
                   tooltips="@index: @value", x_range=attack_origin)

        p.vbar(x='index', top='value', fill_color='color', width=0.5, source=source)

        p.grid.grid_line_color = None
        p.xaxis.major_label_text_font_size = "10pt"
        p.axis.axis_line_color = None
        p.xaxis.major_tick_line_color = None
        p.yaxis.minor_tick_line_color = None
        
        return pc, p  
        
    
    def update_team(atrrname, old, new):
        team = select_team.value
        p1, p2 = plot_attacks_by_origin(team)
        p3, p4 = plot_attacks_by_origin(team, opp_attacks=True)
        layout.children[1::2] = [row(p1, p2), row(p3, p4)]

    
    # Select-Team widget
    teams = sorted(list(team_stats_df['Team'].unique()))
    select_team = Select(title='Select a Team', value='Beitar Jerusalem', options=teams)
    
    team = select_team.value
    select_team.on_change('value', update_team)
    
    # Seperation paragraph
    counter_attack_sep = Div(text="<b>Opponents Attacks</b>", style={'font-size': '170%', 'color': 'grey'})
    
    # Arrange layout
    p1, p2 = plot_attacks_by_origin(team)
    p3, p4 = plot_attacks_by_origin(team, opp_attacks=True)
    layout = column(row(select_team), row(p1, p2),
                    counter_attack_sep, row(p3, p4))
    tab = Panel(child=layout, title='Attacks Origins')
    
    return tab

In [36]:
# Plot players performances

def players_performance_tab(player_info_df, player_stats_df):
                
        
    def create_ds(team, positions):
        """Returns Dataframe"""
        
        # Default Values
        SIZE = 8
        SIZES = [s for s in range(5, 38, 4)]
        COLOR = 'blueviolet'
        MAX_COLORS = len(Category20_20)
        
        if team == 'All':
            ds = pd.DataFrame(joined_player_df[
                joined_player_df['Position'].isin(positions)])
        else:
            ds = pd.DataFrame(joined_player_df[
                (joined_player_df['Position'].isin(positions)) & 
                (joined_player_df['Team'] == team)])
        
        # Add sizes
        if size.value != 'None':
            if len(ds[size.value].unique()) > len(SIZES):
                groups = pd.qcut(ds[size.value].values, 
                                 len(SIZES), duplicates='drop')
            else:
                groups = pd.Categorical(ds[size.value].values)

            ds['Size'] = np.array([SIZES[x] for x in groups.codes])
        
        else:
            ds['Size'] = SIZE
        
        # Add colors
        if color.value != 'None':
            if len(ds[color.value].unique()) > MAX_COLORS:
                groups = pd.qcut(ds[color.value].values, 
                                 MAX_COLORS, duplicates='drop')
            else:
                groups = pd.Categorical(ds[color.value].values)

            ds['Color'] = np.array([Category20_20[x] for x in groups.codes])
        
        else:
            ds['Color'] = COLOR
        
        
        
        
        
        return ds
    
        
    def plot_stats():
        
        pos = [positions[i] for i in select_position.active]
        team = select_team.value
        ds = create_ds(team, pos)
        source = ColumnDataSource(ds)

        p = figure(plot_height=600, plot_width=650, title=f'{x.value} vs {y.value}',
                   tools='pan,box_zoom,reset')

        p.circle(x=x.value, y=y.value, size='Size', color='Color',
                 alpha=0.5, source=source, hover_color='navy')
        
        hover = HoverTool(tooltips=[('Player', '@Name'), 
                                    ('Team', '@Team'),
                                    (f'{x.value}', f'@{{{x.value}}}'), 
                                    (f'{y.value}', f'@{{{y.value}}}'),
                                    (f'Opponent', f'@Opponent')])

        if size.value != 'None':
            hover.tooltips.append((f'{size.value}', f'@{{{size.value}}}'))
        if color.value != 'None':
            hover.tooltips.append((f'{color.value}', f'@{{{color.value}}}'))
        hover.point_policy = 'follow_mouse'
        
        p.add_tools(hover)
        p.xaxis.axis_label = x.value
        p.yaxis.axis_label = y.value

        return p


    def update(atrrname, old, new):
        layout.children[1] = plot_stats()
    

    # Data filtering widgets by Team and Position
    teams = ['All'] + sorted(list(player_info_df['Team'].unique()))
    select_team = Select(title='Filter by Team', value='All', options=teams)
    select_team.on_change('value', update)

    positions = ['GK', 'Defender', 'Midfielder', 'Forward']
    select_position = CheckboxButtonGroup(labels=positions, active=[0, 1, 2, 3])
    select_position.on_change('active', update)

    # Select stats to plot
    columns = [x for  x in sorted(player_stats_df.columns) 
               if x not in ['index', 'pid', 'Gameweek', 'Season']]
    x = Select(title='X Axis', value='Minutes', options=columns)
    x.on_change('value', update)
    y = Select(title='Y Axis', value='Passes', options=columns)
    y.on_change('value', update)
    size = Select(title='Add Size Dimension', value='None', options=['None']+columns)
    size.on_change('value', update)
    color = Select(title='Add Color Segmentation', value='None', options=['None', 'Result', 'Gameweek', 'Position'])
    color.on_change('value', update)

    widgets = widgetbox([select_team, select_position, x, y, size, color])

    layout = row(widgets, plot_stats())
    tab = Panel(child=layout, title='Players Performances')


    return tab
        

In [37]:
def modify_doc(doc):

    tab1 = basic_teams_stats_tab(team_stats_df)
    tab2 = attacks_origin_tab(team_stats_df)
    tab3 = players_performance_tab(player_info_df, player_stats_df)
    tabs = Tabs(tabs=[tab1, tab2, tab3])
    doc.add_root(tabs)

show(modify_doc)

In [27]:
Set2[3]

['#66c2a5', '#fc8d62', '#8da0cb']