In [1]:
import dash
from dash import html, dcc
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output, State 
import plotly.express as px
import altair as alt
import pandas as pd
import os
from dash_bootstrap_components import icons

In [3]:
seasons = []
rows_total = 0
data_path = "../data/raw"  # Relative path to your 'raw' folder

# Check if the directory exists
if not os.path.exists(data_path):
    raise FileNotFoundError(f"Directory '{data_path}' not found! Make sure 'raw' is in the same folder as this script.")

for season in os.listdir(data_path):
    if season[-4:] == ".csv":
        full_path = os.path.join(data_path, season)  # Builds e.g., "raw/0506.csv"
        try:
            thisseason = pd.read_csv(full_path, on_bad_lines="warn", encoding='windows-1252')
            # Extract season from filename (e.g., "0506.csv" -> "05/06")
            season_name = season.replace(".csv", "")  # Strips ".csv" to get "0506"
            thisseason["Season"] = str(season_name[0:2]) + "/" + str(season_name[2:4])  # Turns "0506" into "05/06"
            seasons.append(thisseason)
        except FileNotFoundError:
            print(f"Couldn’t find {full_path} - skipping.")
        except Exception as e:
            print(f"Error loading {full_path}: {e}")

for season in seasons:
    rows_total += len(season)
    season["Date"] = pd.to_datetime(season["Date"], format="mixed", dayfirst=True)

df = pd.concat(seasons, axis=0, ignore_index=True).dropna(axis=1, how="all").dropna(axis=0, how="all")

print("%.2f %% of original data imported successfully" % (len(df) / rows_total * 100))
print("%i rows dropped." % (rows_total - len(df)))

FileNotFoundError: Directory '../data/raw' not found! Make sure 'raw' is in the same folder as this script.

In [None]:
df.head(30)

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,...,1XBCH,1XBCD,1XBCA,BFECH,BFECD,BFECA,BFEC>2.5,BFEC<2.5,BFECAHH,BFECAHA
0,E0,2005-08-13,Aston Villa,Bolton,2.0,2.0,D,2.0,2.0,D,...,,,,,,,,,,
1,E0,2005-08-13,Everton,Man United,0.0,2.0,A,0.0,1.0,A,...,,,,,,,,,,
2,E0,2005-08-13,Fulham,Birmingham,0.0,0.0,D,0.0,0.0,D,...,,,,,,,,,,
3,E0,2005-08-13,Man City,West Brom,0.0,0.0,D,0.0,0.0,D,...,,,,,,,,,,
4,E0,2005-08-13,Middlesbrough,Liverpool,0.0,0.0,D,0.0,0.0,D,...,,,,,,,,,,
5,E0,2005-08-13,Portsmouth,Tottenham,0.0,2.0,A,0.0,1.0,A,...,,,,,,,,,,
6,E0,2005-08-13,Sunderland,Charlton,1.0,3.0,A,1.0,1.0,D,...,,,,,,,,,,
7,E0,2005-08-13,West Ham,Blackburn,3.0,1.0,H,0.0,1.0,A,...,,,,,,,,,,
8,E0,2005-08-14,Arsenal,Newcastle,2.0,0.0,H,0.0,0.0,D,...,,,,,,,,,,
9,E0,2005-08-14,Wigan,Chelsea,0.0,1.0,A,0.0,0.0,D,...,,,,,,,,,,


In [None]:
team_codes = {
    "Burnley": "BRN",
    "Huddersfield": "HUD",
    "Ipswich": "IPS",
    "Portsmouth": "POR",
    "Wigan": "WIG",
    "Nott'm Forest": "NFO",
    "Stoke": "STO",
    "Liverpool": "LIV",
    "Luton": "LUT",
    "Man City": "MCI",
    "Sunderland": "SUN",
    "Leeds": "LEE",
    "Swansea": "SWA",
    "Chelsea": "CHE",
    "QPR": "QPR",
    "Bournemouth": "BOU",
    "Watford": "WAT",
    "Derby": "DER",
    "Charlton": "CHA",
    "Bolton": "BOL",
    "Reading": "REA",
    "Brighton": "BHA",
    "Newcastle": "NEW",
    "Fulham": "FUL",
    "West Brom": "WBA",
    "Middlesborough": "MID",
    "Norwich": "NOR",
    "Birmingham": "BIR",
    "Blackburn": "BBR",
    "Everton": "EVE",
    "Tottenham": "TOT",
    "Hull": "HUL",
    "Cardiff": "CAR",
    "Aston Villa": "AVL",
    "Man United": "MUN",
    "Crystal Palace": "CRY",
    "Arsenal": "ARS",
    "Sheffield United": "SHU",
    "Southampton": "SOU",
    "Wolves": "WOL",
    "West Ham": "WHU",
    "Leicester": "LEI",
    "Brentford": "BRE",
    "Blackpool": "BLA"
}

team_colours = { # finish these!
    "Burnley": "black",
    "Huddersfield": "black",
    "Ipswich": "black",
    "Portsmouth": "black",
    "Wigan": "black",
    "Nott'm Forest": "black",
    "Stoke": "black",
    "Liverpool": "darkred",
    "Luton": "black",
    "Man City": "black",
    "Sunderland": "black",
    "Leeds": "black",
    "Swansea": "black",
    "Chelsea": "black",
    "QPR": "black",
    "Bournemouth": "black",
    "Watford": "black",
    "Derby": "black",
    "Charlton": "black",
    "Bolton": "black",
    "Reading": "black",
    "Brighton": "black",
    "Newcastle": "black",
    "Fulham": "black",
    "West Brom": "black",
    "Middlesborough": "black",
    "Norwich": "black",
    "Birmingham": "black",
    "Blackburn": "black",
    "Everton": "black",
    "Tottenham": "navy",
    "Hull": "black",
    "Cardiff": "black",
    "Aston Villa": "black",
    "Man United": "black",
    "Crystal Palace": "black",
    "Arsenal": "red",
    "Sheffield United": "black",
    "Southampton": "black",
    "Wolves": "black",
    "West Ham": "black",
    "Leicester": "black",
    "Brentford": "black",
    "Blackpool": "black"
}

stats = ["Goals", "Wins"]


In [None]:
app = dash.Dash(external_stylesheets = [dbc.themes.BOOTSTRAP, dbc.icons.BOOTSTRAP])

In [None]:
colmaxht = "90%"

In [13]:
app.layout = dbc.Container([
    html.H1("Premier League Dashboard"),
    dbc.Row([
        dcc.Dropdown(
            id="stat-list",
            value="Goals",
            options=[{"label": stat, "value": stat} for stat in stats]
        ),
        dbc.Col([
            dcc.Checklist(
                id="teams-list",
                options=[{"label": team, "value": team} for team in sorted(team_codes)],
                style={"overflowY": "scroll", "max-height": "100%", "max-width": "100%"},
                value=["Arsenal", "Chelsea", "Liverpool", "Man United", "Tottenham", "Man City"]
            )
        ], md=2, style={"max-height": colmaxht}),
        dbc.Col([
            dcc.Checklist(
                id="seasons-list",
                options=[{"label": season, "value": season} for season in sorted(list(set(df["Season"])))],
                style={"overflowY": "scroll", "max-height": "100%", "max-width": "100%"},
                value=sorted(list(set(df["Season"])))
            )
        ], md=2, style={"max-height": colmaxht}),
        dbc.Col([
            dcc.Graph(id="pie1")
        ], md=4, style={"max-height": colmaxht}),
        dbc.Col([
            dcc.Graph(id="pie2")
        ], md=4, style={"max-height": colmaxht}),
    ], style={"height": "60%", "margin-bottom": "60px"}),
    dbc.Row([
        dcc.Graph(id="timeline1")
    ], style={"height": "25%", 'margin-top': '20px', 'margin-bottom': '90px'}),#style={"height": "25%"}
    dbc.Row([
        dcc.Graph(id="timeline2")
    ], style={"height": "25%"}) #style={"height": "25%"}
], fluid=True, style={"height": "95vh"})

# Set up callbacks / backend
# ---------------------------------

@app.callback(
    Output("pie1", "figure"),
    Output("pie2", "figure"),
    Output("timeline1", "figure"),
    Output("timeline2", "figure"),
    Input("teams-list", "value"),
    Input("seasons-list", "value"),
    Input("stat-list", "value")
)
def plot_plotly(teamslist, seasonslist, statlist):
    # Filter data
    filtered_df = df[df["Season"].isin(seasonslist)]
    filtered_df = filtered_df[filtered_df["HomeTeam"].isin(teamslist) | filtered_df["AwayTeam"].isin(teamslist)]

    if statlist == "Goals":
        # Calculate goals per team
        filtered_df = filtered_df.copy()
        filtered_df["Team"] = filtered_df.apply(
            lambda row: row["HomeTeam"] if row["HomeTeam"] in teamslist else row["AwayTeam"], axis=1
        )
        filtered_df["Team_Goals"] = filtered_df.apply(
            lambda row: row["FTHG"] if row["HomeTeam"] in teamslist else row["FTAG"], axis=1
        )

        # Aggregate data for charts
        goals_by_team = filtered_df.groupby("Team")["Team_Goals"].sum().reset_index()
        goals_by_season = filtered_df.groupby(["Season", "Team"])["Team_Goals"].sum().reset_index()
        goals_by_date = filtered_df.groupby(["Date", "Team"])["Team_Goals"].sum().reset_index()

        # Pie Chart 1: Total goals by team
        pie1 = px.pie(
            goals_by_team,
            values="Team_Goals",
            names="Team",
            title="Total Goals by Team",
            color="Team",
            color_discrete_map=team_colours
        )

        # Pie Chart 2: Another view (e.g., average goals per match, placeholder)
        avg_goals = filtered_df.groupby("Team")["Team_Goals"].mean().reset_index()
        pie2 = px.pie(
            avg_goals,
            values="Team_Goals",
            names="Team",
            title="Average Goals per Match by Team",
            color="Team",
            color_discrete_map=team_colours
        )

        # Timeline 1: Line chart of goals per season
        timeline1 = px.line(
            goals_by_season,
            x="Season",
            y="Team_Goals",
            color="Team",
            title="Goals per Season",
            color_discrete_map=team_colours,
            height=300
        )
        timeline1.update_traces(opacity=0.5)

        # Timeline 2: Scatter chart of goals over time
        timeline2 = px.scatter(
            goals_by_date,
            x="Date",
            y="Team_Goals",
            color="Team",
            title="Goals Over Time",
            color_discrete_map=team_colours,
            height=300
        )
        timeline2.update_traces(opacity=0.5, mode="markers")

    elif statlist == "Wins":
        # Calculate wins per team
        filtered_df = filtered_df.copy()
        filtered_df["Winner"] = filtered_df.apply(
            lambda row: row["HomeTeam"] if row["FTHG"] > row["FTAG"] else 
                       (row["AwayTeam"] if row["FTAG"] > row["FTHG"] else "Draw"), axis=1
        )
        wins_by_team = filtered_df[filtered_df["Winner"].isin(teamslist)].groupby("Winner").size().reset_index(name="Wins")
        wins_by_season = filtered_df[filtered_df["Winner"].isin(teamslist)].groupby(["Season", "Winner"]).size().reset_index(name="Wins")
        wins_by_date = filtered_df[filtered_df["Winner"].isin(teamslist)].groupby(["Date", "Winner"]).size().reset_index(name="Wins")

        # Pie Chart 1: Total wins by team
        pie1 = px.pie(
            wins_by_team,
            values="Wins",
            names="Winner",
            title="Total Wins by Team",
            color="Winner",
            color_discrete_map=team_colours
        )

        # Pie Chart 2: Placeholder (e.g., win percentage)
        total_matches = filtered_df.groupby("Winner").size().reset_index(name="Matches")
        win_pct = wins_by_team.merge(total_matches, on="Winner")
        win_pct["Win_Percentage"] = (win_pct["Wins"] / win_pct["Matches"]) * 100
        pie2 = px.pie(
            win_pct,
            values="Win_Percentage",
            names="Winner",
            title="Win Percentage by Team",
            color="Winner",
            color_discrete_map=team_colours
        )

        # Timeline 1: Line chart of wins per season
        timeline1 = px.line(
            wins_by_season,
            x="Season",
            y="Wins",
            color="Winner",
            title="Wins per Season",
            color_discrete_map=team_colours,
            height=300
        )
        timeline1.update_traces(opacity=0.5)

        # Timeline 2: Scatter chart of wins over time
        timeline2 = px.scatter(
            wins_by_date,
            x="Date",
            y="Wins",
            color="Winner",
            title="Wins Over Time",
            color_discrete_map=team_colours,
            height=300
        )
        timeline2.update_traces(opacity=0.5, mode="markers")

    return pie1, pie2, timeline1, timeline2

if __name__ == "__main__":
   app.run_server(debug=True)