# English Premier League 2021/22 Bar Chart Race

The aim of this project is to visualise the entire EPL 2021/22 season using animated bar chart race.  

## 1. Import libraries

In [None]:
import bar_chart_race as bcr
import pandas as pd

## 2. Load raw dataset

We will be using data from [Football-Data](https://www.football-data.co.uk/), which is a "free football betting portal providing historical results & odds to help football betting enthusiasts analyse many years of data quickly". 

In [None]:
raw_df = pd.read_csv("https://www.football-data.co.uk/mmz4281/2122/E0.csv", parse_dates=['Date'], dayfirst=True)
raw_df = raw_df[['Date', 'Time', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR']]
raw_df.tail()

In [None]:
raw_df.tail()

## 3. Wrangle data into "wide" format

We need to wrangle the raw data in `raw_df` DataFrame into a format that is necessary before the `bar_chart_race` package can be used. According to its [documentation](https://github.com/dexplo/bar_chart_race#:~:text=Must%20begin%20with%20a%20pandas%20DataFrame%20containing%20%27wide%27%20data%20where%3A), the data must be in the following format:  

- Every row represents a single period of time
- Each column holds the value for a particular category
- The index contains the time component (optional)

In [None]:
def get_hometeam_points(result):
    """Compute points awarded given a particular result:
    - 3 points for a win;
    - 1 point for a draw;
    - 0 point for a loss.
    """
    if result == 'H':
        return 3
    elif result == 'A':
        return 0
    elif result == 'D':
        return 1
    
def get_awayteam_points(result):
    """Compute points awarded given a particular result:
    - 3 points for a win;
    - 1 point for a draw;
    - 0 point for a loss.
    """
    if result == 'A':
        return 3
    elif result == 'H':
        return 0
    elif result == 'D':
        return 1
    
raw_df.loc[:, 'HomeTeam_Points'] = raw_df['FTR'].apply(get_hometeam_points)
raw_df.loc[:, 'AwayTeam_Points'] = raw_df['FTR'].apply(get_awayteam_points)
raw_df.tail()

In [None]:
# Select relevant columns only
df1 = raw_df[['Date', 'HomeTeam', 'AwayTeam', 'HomeTeam_Points', 'AwayTeam_Points']]

# Pivot `df1` so each row correspond to each team
df2 = pd.melt(df1, id_vars=['Date'], value_vars=['HomeTeam', 'AwayTeam'], value_name='Team')

# Pivot `df1` so each row correspond to each point
df3 = pd.melt(df1, id_vars=['Date'], value_vars=['HomeTeam_Points', 'AwayTeam_Points'], value_name='Points')

# Put `df2` and `df3` side by side
df4 = pd.concat([df2, df3], axis=1)

# Select `Date`, `Team` and `Points` columns only 
df5 = df4.iloc[:, [0, 2, 5]]

# Pivot `df5`, so teams are in columns and rows are each timestamp
df6 = pd.pivot(df5, index='Date', columns='Team', values='Points')

# Fill NaN with 0
df6.fillna(0, inplace=True)

# Get `Date` from the index
df6.reset_index(inplace=True) 

# Group points by week
df6 = df6.groupby(pd.Grouper(key='Date', freq='W')).sum()

# Do a cumulative sum
df7 = df6.cumsum()

df7

## 4. Create a bar chart race using `bar_chart_race` package

In [None]:
# help(bcr.bar_chart_race)

In [None]:
bcr.bar_chart_race(
    df=df7, 
    filename='../output/epl_2122_race.gif',
    n_bars=20, 
    fixed_order=False,
    fixed_max=True,
    steps_per_period=30, 
    period_length=700,
    interpolate_period=True,
    period_label={
        'ha': 'right',
        'va': 'center', 
        'weight': 'semibold',
        'size': 35
    },
    period_template="%B %Y",
    colors=[
            '#EF0107', '#95BFE5', '#E30613', '#0057B8', '#6C1D45', 
            '#034694', '#1B458F', '#003399', '#FFCD00', '#003090',
            '#C8102E', '#6CABDD', '#DA291C', '#241F20', '#00A650', 
            '#D71920', '#132257', '#FBEE23', '#7A263A', '#FDB913'
        ],
    title={
        'label': 'The 2021/22 English Premier League Season in 30 Seconds',
        'size': 45,
        'weight': 'bold',
        'pad': 30
    },
    bar_size=0.70,
    bar_textposition='inside',
    bar_label_font={
        'size': 15,
        'family': 'DejaVu Sans',
        'color': '#FFFFFF',
        'weight': 'semibold'
    }, 
    tick_label_font={
        'size': 18,
        'family': 'DejaVu Sans',
        'color': '#7f7f7f',
    }, 
    bar_kwargs={
        'alpha': 0.8,
    }, 
    fig_kwargs={
        'figsize': (30, 16),
        'dpi': 150,
    }, 
    img_label_folder="../logos", 
    tick_label_mode='mixed', 
    writer='imagemagick'
)

## Useful resources:

- Data source: https://www.football-data.co.uk/englandm.php
- `bar_chart_race` documentation: https://www.dexplo.org/bar_chart_race/
- `bar_chart_race` GitHub page: https://github.com/dexplo/bar_chart_race
- Installation of ffmpeg: https://www.ffmpeg.org/download.html
- Installation of ImageMagick: https://imagemagick.org/