# English Premier League 2021/22 Bar Chart Race

## 1. Import libraries

In [1]:
import bar_chart_race as bcr
import pandas as pd
import time

## 2. Load raw dataset

We will be using data from [Football-Data](https://www.football-data.co.uk/), which is a "free football betting portal providing historical results & odds to help football betting enthusiasts analyse many years of data quickly". 

In [2]:
raw_df = pd.read_csv("https://www.football-data.co.uk/mmz4281/2122/E0.csv", parse_dates=['Date'], dayfirst=True)
raw_df = raw_df[['Date', 'Time', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR']]
raw_df.tail()

Unnamed: 0,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR
365,2022-05-16,20:00,Newcastle,Arsenal,2,0,H
366,2022-05-17,19:45,Southampton,Liverpool,1,2,A
367,2022-05-19,19:45,Everton,Crystal Palace,3,2,H
368,2022-05-19,20:00,Aston Villa,Burnley,1,1,D
369,2022-05-19,20:00,Chelsea,Leicester,1,1,D


In [3]:
raw_df.tail()

Unnamed: 0,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR
365,2022-05-16,20:00,Newcastle,Arsenal,2,0,H
366,2022-05-17,19:45,Southampton,Liverpool,1,2,A
367,2022-05-19,19:45,Everton,Crystal Palace,3,2,H
368,2022-05-19,20:00,Aston Villa,Burnley,1,1,D
369,2022-05-19,20:00,Chelsea,Leicester,1,1,D


## 3. Wrangle data into "wide" format

We need to wrangle the raw data in `raw_df` DataFrame into a format that is necessary before the `bar_chart_race` package can be used. According to its [documentation](https://github.com/dexplo/bar_chart_race#:~:text=Must%20begin%20with%20a%20pandas%20DataFrame%20containing%20%27wide%27%20data%20where%3A), the data must be in the following format:  

- Every row represents a single period of time
- Each column holds the value for a particular category
- The index contains the time component (optional)

In [4]:
def get_hometeam_points(result):
    """Compute points awarded given a particular result:
    - 3 points for a win;
    - 1 point for a draw;
    - 0 point for a loss.
    """
    if result == 'H':
        return 3
    elif result == 'A':
        return 0
    elif result == 'D':
        return 1
    
def get_awayteam_points(result):
    """Compute points awarded given a particular result:
    - 3 points for a win;
    - 1 point for a draw;
    - 0 point for a loss.
    """
    if result == 'A':
        return 3
    elif result == 'H':
        return 0
    elif result == 'D':
        return 1
    
raw_df.loc[:, 'HomeTeam_Points'] = raw_df['FTR'].apply(get_hometeam_points)
raw_df.loc[:, 'AwayTeam_Points'] = raw_df['FTR'].apply(get_awayteam_points)
raw_df.tail()

Unnamed: 0,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HomeTeam_Points,AwayTeam_Points
365,2022-05-16,20:00,Newcastle,Arsenal,2,0,H,3,0
366,2022-05-17,19:45,Southampton,Liverpool,1,2,A,0,3
367,2022-05-19,19:45,Everton,Crystal Palace,3,2,H,3,0
368,2022-05-19,20:00,Aston Villa,Burnley,1,1,D,1,1
369,2022-05-19,20:00,Chelsea,Leicester,1,1,D,1,1


In [5]:
# Select relevant columns only
df1 = raw_df[['Date', 'HomeTeam', 'AwayTeam', 'HomeTeam_Points', 'AwayTeam_Points']]

# Pivot `df1` so each row correspond to each team
df2 = pd.melt(df1, id_vars=['Date'], value_vars=['HomeTeam', 'AwayTeam'], value_name='Team')

# Pivot `df1` so each row correspond to each point
df3 = pd.melt(df1, id_vars=['Date'], value_vars=['HomeTeam_Points', 'AwayTeam_Points'], value_name='Points')

# Put `df2` and `df3` side by side
df4 = pd.concat([df2, df3], axis=1)

# Select `Date`, `Team` and `Points` columns only 
df5 = df4.iloc[:, [0, 2, 5]]

# Pivot `df5`, so teams are in columns and rows are each timestamp
df6 = pd.pivot(df5, index='Date', columns='Team', values='Points')

# Fill NaN with 0
df6.fillna(0, inplace=True)

# Get `Date` from the index
df6.reset_index(inplace=True) 

# Group points by week
df6 = df6.groupby(pd.Grouper(key='Date', freq='W')).sum()

# Do a cumulative sum
df7 = df6.cumsum()

df7

Team,Arsenal,Aston Villa,Brentford,Brighton,Burnley,Chelsea,Crystal Palace,Everton,Leeds,Leicester,Liverpool,Man City,Man United,Newcastle,Norwich,Southampton,Tottenham,Watford,West Ham,Wolves
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2021-08-15,0.0,0.0,3.0,3.0,0.0,3.0,0.0,3.0,0.0,3.0,3.0,0.0,3.0,0.0,0.0,0.0,3.0,3.0,3.0,0.0
2021-08-22,0.0,3.0,4.0,6.0,0.0,6.0,1.0,4.0,1.0,3.0,6.0,3.0,4.0,0.0,0.0,1.0,6.0,3.0,3.0,0.0
2021-08-29,0.0,4.0,5.0,6.0,1.0,7.0,2.0,7.0,2.0,6.0,7.0,6.0,7.0,1.0,0.0,2.0,9.0,3.0,7.0,0.0
2021-09-05,0.0,4.0,5.0,6.0,1.0,7.0,2.0,7.0,2.0,6.0,7.0,6.0,7.0,1.0,0.0,2.0,9.0,3.0,7.0,0.0
2021-09-12,3.0,4.0,5.0,9.0,1.0,10.0,5.0,7.0,2.0,6.0,10.0,9.0,10.0,1.0,0.0,3.0,9.0,3.0,8.0,3.0
2021-09-19,6.0,7.0,8.0,12.0,1.0,13.0,5.0,10.0,3.0,6.0,13.0,10.0,13.0,2.0,0.0,4.0,9.0,6.0,8.0,3.0
2021-09-26,9.0,10.0,9.0,12.0,2.0,13.0,5.0,13.0,3.0,7.0,14.0,13.0,13.0,3.0,0.0,4.0,9.0,7.0,11.0,6.0
2021-10-03,10.0,10.0,12.0,14.0,3.0,16.0,7.0,14.0,6.0,8.0,15.0,14.0,14.0,3.0,1.0,4.0,12.0,7.0,11.0,9.0
2021-10-10,10.0,10.0,12.0,14.0,3.0,16.0,7.0,14.0,6.0,8.0,15.0,14.0,14.0,3.0,1.0,4.0,12.0,7.0,11.0,9.0
2021-10-17,10.0,10.0,12.0,15.0,3.0,19.0,7.0,14.0,6.0,11.0,18.0,17.0,14.0,3.0,2.0,7.0,15.0,7.0,14.0,12.0


## 4. Create a bar chart race using `bar_chart_race` package

In [6]:
# help(bcr.bar_chart_race)

In [8]:
start = time.time()

bcr.bar_chart_race(
    df=df7, 
    filename='../output/epl_2122_race.mp4',
    n_bars=20, 
    fixed_order=False,
    fixed_max=True,
    steps_per_period=30, 
    period_length=700,
    interpolate_period=True,
    period_label={
        'ha': 'right',
        'va': 'center', 
        'weight': 'semibold',
        'size': 35
    },
    period_template="%B %Y",
    colors=[
            '#EF0107', '#95BFE5', '#E30613', '#0057B8', '#6C1D45', 
            '#034694', '#1B458F', '#003399', '#FFCD00', '#003090',
            '#C8102E', '#6CABDD', '#DA291C', '#241F20', '#00A650', 
            '#D71920', '#132257', '#FBEE23', '#7A263A', '#FDB913'
        ],
    title={
        'label': 'The 2021/22 English Premier League Season in 30 Seconds',
        'size': 45,
        'weight': 'bold',
        'pad': 30
    },
    bar_size=0.70,
    bar_textposition='inside',
    bar_label_font={
        'size': 15,
        'family': 'DejaVu Sans',
        'color': '#FFFFFF',
        'weight': 'semibold'
    }, 
    tick_label_font={
        'size': 18,
        'family': 'DejaVu Sans',
        'color': '#7f7f7f',
    }, 
    bar_kwargs={
        'alpha': 0.8,
    }, 
    fig_kwargs={
        'figsize': (30, 16),
        'dpi': 150,
    }, 
    img_label_folder="../logos", 
    tick_label_mode='mixed', 
)

print(f"Completed in {round((end - start)/60, 2)} mins!")

NameError: name 'end' is not defined

## Useful resources:

- asfda
- asfda
- asdfa