In [78]:
import requests
import pandas as pd
import os
import warnings
warnings.filterwarnings("ignore")

In [2]:
seasons = []
first = 1992
while first <= 2023:
    year = str(first) + "-" + str(first + 1)
    first = first + 1
    seasons.append(year)

In [3]:
url = "https://fbref.com/en/comps/8/1996-1997/schedule/1996-1997-Champions-League-Scores-and-Fixtures"

In [4]:
html = requests.get(url).content
df_list = pd.read_html(html)
league_table = df_list[0]

In [46]:
league_table[["Round", "Home", "Score", "Away"]]

Unnamed: 0,Round,Home,Score,Away
0,Group stage,Juventus it,1–0,eng Manchester Utd
1,Group stage,Atlético Madrid es,4–0,ro Steaua
2,Group stage,Dortmund de,2–1,pl Widzew Łódź
3,Group stage,Rapid Wien at,1–1,tr Fenerbahçe
4,Group stage,Grasshopper ch,3–0,sct Rangers
...,...,...,...,...
62,Semi-finals,Ajax nl,1–2,it Juventus
63,Semi-finals,Dortmund de,1–0,eng Manchester Utd
64,Semi-finals,Manchester Utd eng,0–1,de Dortmund
65,Semi-finals,Juventus it,4–1,nl Ajax


In [47]:
def split_score(score):
    try:
        home_score = score[0]
        away_score = score[-1]
        return pd.Series([int(home_score), int(away_score)])
    except:
        return pd.Series([None, None])

In [56]:
def clean_data(df):
    df["Home"] = df["Home"].str[:-3]
    df["Away"] = df["Away"].str[3:]
    df[["Home_Score", "Away_Score"]] = df["Score"].apply(split_score)

    df["Winner_team"] = df.apply(
        lambda row: (
            row["Home"]
            if row["Home_Score"] > row["Away_Score"]
            else (row["Away"] if row["Away_Score"] > row["Home_Score"] else "Draw")
        ),
        axis=1,
    )
    df["Winner_Home_Away"] = df.apply(
        lambda row: (
            "Home"
            if row["Home_Score"] > row["Away_Score"]
            else ("Away" if row["Away_Score"] > row["Home_Score"] else "Draw")
        ),
        axis=1,
    )
    return df

In [57]:
X = league_table[["Round", "Home", "Score", "Away"]]
X.head()

Unnamed: 0,Round,Home,Score,Away
0,Group stage,Juventus it,1–0,eng Manchester Utd
1,Group stage,Atlético Madrid es,4–0,ro Steaua
2,Group stage,Dortmund de,2–1,pl Widzew Łódź
3,Group stage,Rapid Wien at,1–1,tr Fenerbahçe
4,Group stage,Grasshopper ch,3–0,sct Rangers


In [58]:
X = X.dropna()

In [59]:
X = clean_data(X)
X.head()

Unnamed: 0,Round,Home,Score,Away,Home_Score,Away_Score,Winner_team,Winner_Home_Away
0,Group stage,Juventus,1–0,Manchester Utd,1,0,Juventus,Home
1,Group stage,Atlético Madrid,4–0,Steaua,4,0,Atlético Madrid,Home
2,Group stage,Dortmund,2–1,Widzew Łódź,2,1,Dortmund,Home
3,Group stage,Rapid Wien,1–1,Fenerbahçe,1,1,Draw,Draw
4,Group stage,Grasshopper,3–0,Rangers,3,0,Grasshopper,Home


In [91]:
def get_table(season):
    url = f"https://fbref.com/en/comps/8/{season}/schedule/{season}-Champions-League-Scores-and-Fixtures"
    try:
        html = requests.get(url).content
        df = pd.read_html(html)
        return df[0]
    except Exception as e:
        print(f"Error on {season}: {e}")
        pass


def select_cols(df):
    try:
        return df[["Round", "Home", "Score", "Away"]]
    except Exception as e:
        print(f"Error message: {e}")


def clean_data(df):
    try:
        df = df.dropna()
    except Exception as e:
        print(f"Error message: {e}")
        pass
    df.loc[:, "Home"] = df["Home"].str[:-3]
    df.loc[:, "Away"] = df["Away"].str[3:]
    df[["Home_Score", "Away_Score"]] = df["Score"].apply(split_score)

    df.loc[:, "Winner_team"] = df.apply(
        lambda row: (
            row["Home"]
            if row["Home_Score"] > row["Away_Score"]
            else (row["Away"] if row["Away_Score"] > row["Home_Score"] else "Draw")
        ),
        axis=1,
    )
    df.loc[:, "Winner_Home_Away"] = df.apply(
        lambda row: (
            "Home"
            if row["Home_Score"] > row["Away_Score"]
            else ("Away" if row["Away_Score"] > row["Home_Score"] else "Draw")
        ),
        axis=1,
    )
    return df


def save_csv(df, season):
    try:
        df.to_csv(f"./data/{season}.csv")
    except Exception as e:
        print(f"Error message: {e}")

In [92]:
Y = league_table[["Round", "Home", "Score", "Away"]]
Y.head()

Unnamed: 0,Round,Home,Score,Away
0,Group stage,Juventus it,1–0,eng Manchester Utd
1,Group stage,Atlético Madrid es,4–0,ro Steaua
2,Group stage,Dortmund de,2–1,pl Widzew Łódź
3,Group stage,Rapid Wien at,1–1,tr Fenerbahçe
4,Group stage,Grasshopper ch,3–0,sct Rangers


In [93]:
Y = clean_data(Y)
Y.head()

Unnamed: 0,Round,Home,Score,Away,Home_Score,Away_Score,Winner_team,Winner_Home_Away
0,Group stage,Juventus,1–0,Manchester Utd,1,0,Juventus,Home
1,Group stage,Atlético Madrid,4–0,Steaua,4,0,Atlético Madrid,Home
2,Group stage,Dortmund,2–1,Widzew Łódź,2,1,Dortmund,Home
3,Group stage,Rapid Wien,1–1,Fenerbahçe,1,1,Draw,Draw
4,Group stage,Grasshopper,3–0,Rangers,3,0,Grasshopper,Home


In [81]:
save_csv(Y,"AHSD")

In [87]:
Y.to_csv("./data/data.csv")

In [94]:
for season in seasons:
    print(season)
    try:
        df = get_table(season=season)
        df = select_cols(df)
        df = clean_data(df)
        save_csv(df, season)
    except Exception as e:
        print(f"Error message: {e}")
        pass

1992-1993
Error on 1992-1993: No tables found
Error message: 'NoneType' object is not subscriptable
Error message: 'NoneType' object has no attribute 'dropna'
Error message: 'NoneType' object is not subscriptable
1993-1994
Error on 1993-1994: No tables found
Error message: 'NoneType' object is not subscriptable
Error message: 'NoneType' object has no attribute 'dropna'
Error message: 'NoneType' object is not subscriptable
1994-1995
Error on 1994-1995: No tables found
Error message: 'NoneType' object is not subscriptable
Error message: 'NoneType' object has no attribute 'dropna'
Error message: 'NoneType' object is not subscriptable
1995-1996
Error on 1995-1996: No tables found
Error message: 'NoneType' object is not subscriptable
Error message: 'NoneType' object has no attribute 'dropna'
Error message: 'NoneType' object is not subscriptable
1996-1997
Error on 1996-1997: No tables found
Error message: 'NoneType' object is not subscriptable
Error message: 'NoneType' object has no attribut