In [342]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import pickle

In [343]:
df = pd.read_csv('champions_league_history.csv')

In [344]:
df

Unnamed: 0,home,score,away,year
0,Sporting CP,3–3,Partizan,1955–56
1,Vörös Lobogó,6–3,Anderlecht,1955–56
2,Servette,0–2,Real Madrid,1955–56
3,Rot-Weiss Essen,0–4,Hibernian,1955–56
4,Djurgården,0–0,Gwardia Warsaw,1955–56
...,...,...,...,...
8040,Arsenal,0–1,Paris Saint-Germain,2024–25
8041,Paris Saint-Germain,2–1,Arsenal,2024–25
8042,Barcelona,3–3,Inter Milan,2024–25
8043,Inter Milan,4–3 (a.e.t.),Barcelona,2024–25


### Cleaning Data

In [345]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8045 entries, 0 to 8044
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   home    8045 non-null   object
 1   score   8045 non-null   object
 2   away    8045 non-null   object
 3   year    8045 non-null   object
dtypes: object(4)
memory usage: 251.5+ KB


In [346]:
## Removing non integer values forn the score column

df['score'] = df['score'].str.replace('(a.e.t.)', '')
df['score'] = df['score'].str.replace('(a.e.t./g.g.)', '')

In [347]:
df['home'] = df['home'].str.strip()
df['score'] = df['score'].str.strip()
df['away'] = df['away'].str.strip()

In [348]:
## Creating score columns for both home and away teams

df[['HomeScore', 'AwayScore']] = df['score'].str.split('–', expand=True)

In [349]:
## Deleting the score column
df.drop('score', axis=1, inplace=True)

In [350]:
df.head()

Unnamed: 0,home,away,year,HomeScore,AwayScore
0,Sporting CP,Partizan,1955–56,3,3
1,Vörös Lobogó,Anderlecht,1955–56,6,3
2,Servette,Real Madrid,1955–56,0,2
3,Rot-Weiss Essen,Hibernian,1955–56,0,4
4,Djurgården,Gwardia Warsaw,1955–56,0,0


In [351]:
## Renaming columns

df.rename(columns={'home' : 'HomeTeam', 'away' : 'AwayTeam', 'year' : 'Season'}, inplace=True)

In [352]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8045 entries, 0 to 8044
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   HomeTeam   8045 non-null   object
 1   AwayTeam   8045 non-null   object
 2   Season     8045 non-null   object
 3   HomeScore  8045 non-null   object
 4   AwayScore  8041 non-null   object
dtypes: object(5)
memory usage: 314.4+ KB


In [353]:
## Dropping rows with non integer values in the score columns

df.drop(df[df['HomeScore'].str.contains('[^\d]') | df['AwayScore'].str.contains('[^\d]')].index, inplace=True)


  df.drop(df[df['HomeScore'].str.contains('[^\d]') | df['AwayScore'].str.contains('[^\d]')].index, inplace=True)
  df.drop(df[df['HomeScore'].str.contains('[^\d]') | df['AwayScore'].str.contains('[^\d]')].index, inplace=True)


In [354]:
df[df['HomeScore'].str.contains('[^\d]')]

  df[df['HomeScore'].str.contains('[^\d]')]


Unnamed: 0,HomeTeam,AwayTeam,Season,HomeScore,AwayScore


In [355]:
## Changing the datatype of the score columns to integer

df = df.astype({'HomeScore': int, 'AwayScore': int})

In [356]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8020 entries, 0 to 8044
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   HomeTeam   8020 non-null   object
 1   AwayTeam   8020 non-null   object
 2   Season     8020 non-null   object
 3   HomeScore  8020 non-null   int64 
 4   AwayScore  8020 non-null   int64 
dtypes: int64(2), object(3)
memory usage: 375.9+ KB


In [357]:
df['Total'] = df['AwayScore'] + df['HomeScore']

In [358]:
home_df = df[['HomeTeam', 'HomeScore', 'AwayScore']]
away_df = df[['AwayTeam', 'AwayScore', 'HomeScore']]

In [359]:
home_df.rename(columns={'HomeTeam': 'Team', 'HomeScore': 'GoalsScored', 'AwayScore': 'GoalsConceded'}, inplace=True)
away_df.rename(columns={'AwayTeam': 'Team', 'AwayScore': 'GoalsScored', 'HomeScore': 'GoalsConceded'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_df.rename(columns={'HomeTeam': 'Team', 'HomeScore': 'GoalsScored', 'AwayScore': 'GoalsConceded'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  away_df.rename(columns={'AwayTeam': 'Team', 'AwayScore': 'GoalsScored', 'HomeScore': 'GoalsConceded'}, inplace=True)


In [360]:
## sorting the dataframes by team names

away_df = away_df.sort_values('Team')
home_df = home_df.sort_values('Team')

In [361]:
away_df.head()

Unnamed: 0,Team,GoalsScored,GoalsConceded
2364,1. FC Kaiserslautern,2,1
2369,1. FC Kaiserslautern,1,2
2363,1. FC Kaiserslautern,0,0
491,1. FC Köln,1,1
510,1. FC Köln,0,0


### Calculating Champions league Teams Strength

In [362]:
## Calculating champions league Goal scoring strength

league_average_goal_scoring_per_match = (df['Total'].sum() / df['Total'].value_counts().sum())

In [363]:
league_average_goal_scoring_per_match

np.float64(2.8634663341645887)

In [364]:
## Merging home_df and away_df to get the average goals scored and conceded by each team

data = pd.concat([home_df, away_df], ignore_index=True).groupby('Team').mean().reset_index()
data

Unnamed: 0,Team,GoalsScored,GoalsConceded
0,1. FC Kaiserslautern,2.000000,1.000000
1,1. FC Köln,1.250000,1.250000
2,1. FC Nürnberg,2.500000,1.500000
3,17 Nëntori,0.875000,1.500000
4,1860 Munich,2.166667,1.166667
...,...,...,...
590,Śląsk Wrocław,0.833333,2.000000
591,Široki Brijeg,0.666667,0.833333
592,Žalgiris,0.961538,1.269231
593,Željezničar,0.812500,1.937500


In [365]:
## Calculating scoring and conceding strength for each team

data['ScoringStrength'] = data['GoalsScored'] / league_average_goal_scoring_per_match
data['ConcedingStrength'] = data['GoalsConceded'] / league_average_goal_scoring_per_match
data

Unnamed: 0,Team,GoalsScored,GoalsConceded,ScoringStrength,ConcedingStrength
0,1. FC Kaiserslautern,2.000000,1.000000,0.698454,0.349227
1,1. FC Köln,1.250000,1.250000,0.436534,0.436534
2,1. FC Nürnberg,2.500000,1.500000,0.873068,0.523841
3,17 Nëntori,0.875000,1.500000,0.305574,0.523841
4,1860 Munich,2.166667,1.166667,0.756659,0.407432
...,...,...,...,...,...
590,Śląsk Wrocław,0.833333,2.000000,0.291023,0.698454
591,Široki Brijeg,0.666667,0.833333,0.232818,0.291023
592,Žalgiris,0.961538,1.269231,0.335795,0.443250
593,Željezničar,0.812500,1.937500,0.283747,0.676627


### Function to predict points and scores

In [366]:
def predict_points_and_scores(home_team, away_team):
    home_team_data = data[data['Team'] == home_team]
    away_team_data = data[data['Team'] == away_team]

    if home_team in data['Team'].values and away_team not in data['Team'].values:
        return {
                'Expected Home Goals': 3,
                'Expected Away Goals': 0,
                'Predicted Points': (3, 0)
            }    
    if home_team not in data['Team'].values and away_team in data['Team'].values:
        return {
                'Expected Home Goals': 0,
                'Expected Away Goals': 3,
                'Predicted Points': (0, 3)
            }    
    if home_team not in data['Team'].values and away_team not in data['Team'].values:
        return {
                'Expected Home Goals': 1,
                'Expected Away Goals': 1,
                'Predicted Points': (1, 1)
            }    
    
    home_scoring_strength = data.loc[data['Team'] == home_team, 'ScoringStrength'].values[0]
    home_conceding_strength = data.loc[data['Team'] == home_team, 'ConcedingStrength'].values[0]
    away_scoring_strength = data.loc[data['Team'] == away_team, 'ScoringStrength'].values[0]
    away_conceding_strength = data.loc[data['Team'] == away_team, 'ConcedingStrength'].values[0]

    expected_home_goals = league_average_goal_scoring_per_match * home_scoring_strength * away_conceding_strength
    expected_away_goals = league_average_goal_scoring_per_match * away_scoring_strength * home_conceding_strength


    if (expected_home_goals - expected_away_goals) > 0.3:
        predicted_points = (3, 0)
    elif (expected_away_goals - expected_home_goals) > 0.3:
        predicted_points = (0, 3)
    else:
        predicted_points = (1, 1)

    return {
        'Expected Home Goals': expected_home_goals.round(0).astype(int),
        'Expected Away Goals': expected_away_goals.round(0).astype(int),
        'Predicted Points': predicted_points
    }

### Testing the Model

In [367]:
dfg = predict_points_and_scores('Real Madrid', 'Atletic Bilbao')
dfg

{'Expected Home Goals': 3,
 'Expected Away Goals': 0,
 'Predicted Points': (3, 0)}

### Scraping 2025-2026 Champions League Fixtures

In [368]:
!pip install lxml beautifulsoup4 requests



In [369]:


def get_tables(url):

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                    "AppleWebKit/537.36 (KHTML, like Gecko) "
                    "Chrome/91.0.4472.124 Safari/537.36"
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        dfs = pd.read_html(response.text)
        return dfs
    else:
        print("❌ Failed, status:", response.status_code)
        return None
    
    
url = "https://en.wikipedia.org/wiki/2025-26_UEFA_Champions_League"

tables = get_tables(url)

  dfs = pd.read_html(response.text)


### standings tables with all the teams

In [370]:
standings = tables[12]

In [371]:
standings.head()

Unnamed: 0,Pos,Teamvte,Pld,W,D,L,GF,GA,GD,Pts,Qualification
0,1,Ajax,0,0,0,0,0,0,0,0,Advance to round of 16 (seeded)
1,2,Arsenal,0,0,0,0,0,0,0,0,Advance to round of 16 (seeded)
2,3,Atalanta,0,0,0,0,0,0,0,0,Advance to round of 16 (seeded)
3,4,Athletic Bilbao,0,0,0,0,0,0,0,0,Advance to round of 16 (seeded)
4,5,Atlético Madrid,0,0,0,0,0,0,0,0,Advance to round of 16 (seeded)


In [372]:
standings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Pos            36 non-null     int64 
 1   Teamvte        36 non-null     object
 2   Pld            36 non-null     int64 
 3   W              36 non-null     int64 
 4   D              36 non-null     int64 
 5   L              36 non-null     int64 
 6   GF             36 non-null     int64 
 7   GA             36 non-null     int64 
 8   GD             36 non-null     int64 
 9   Pts            36 non-null     int64 
 10  Qualification  24 non-null     object
dtypes: int64(9), object(2)
memory usage: 3.2+ KB


In [373]:
standings.drop('Qualification', axis=1, inplace=True)

In [374]:
standings.head()

Unnamed: 0,Pos,Teamvte,Pld,W,D,L,GF,GA,GD,Pts
0,1,Ajax,0,0,0,0,0,0,0,0
1,2,Arsenal,0,0,0,0,0,0,0,0
2,3,Atalanta,0,0,0,0,0,0,0,0
3,4,Athletic Bilbao,0,0,0,0,0,0,0,0
4,5,Atlético Madrid,0,0,0,0,0,0,0,0


In [375]:
standings.rename(columns={'Teamvte': 'Team'}, inplace=True)

In [376]:
standings.head()

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts
0,1,Ajax,0,0,0,0,0,0,0,0
1,2,Arsenal,0,0,0,0,0,0,0,0
2,3,Atalanta,0,0,0,0,0,0,0,0
3,4,Athletic Bilbao,0,0,0,0,0,0,0,0
4,5,Atlético Madrid,0,0,0,0,0,0,0,0


In [377]:
with open('standings', 'wb') as f:
    pickle.dump(standings, f)

#### Loading the draws and standings table 

In [378]:
## Draws table

with open('draws', 'rb') as f:
    draws = pickle.load(f)

In [379]:
draws[0].head()

Unnamed: 0,Home team,Score,Away team
0,Athletic Bilbao,16 Sep,Arsenal
1,PSV Eindhoven,16 Sep,Union Saint-Gilloise
2,Juventus,16 Sep,Borussia Dortmund
3,Real Madrid,16 Sep,Marseille
4,Benfica,16 Sep,Qarabağ


In [380]:
## Standings table

with open('standings', 'rb') as f:
    standings = pickle.load(f)

In [381]:
standings

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts
0,1,Ajax,0,0,0,0,0,0,0,0
1,2,Arsenal,0,0,0,0,0,0,0,0
2,3,Atalanta,0,0,0,0,0,0,0,0
3,4,Athletic Bilbao,0,0,0,0,0,0,0,0
4,5,Atlético Madrid,0,0,0,0,0,0,0,0
5,6,Barcelona,0,0,0,0,0,0,0,0
6,7,Bayer Leverkusen,0,0,0,0,0,0,0,0
7,8,Bayern Munich,0,0,0,0,0,0,0,0
8,9,Benfica,0,0,0,0,0,0,0,0
9,10,Bodø/Glimt,0,0,0,0,0,0,0,0


#### Function to Update draws and the standings tables

In [382]:
def update_tables(data, standings):
    
    for index, row in data.iterrows():
        home_team = row['Home team']
        away_team = row['Away team']
        
        # Predicting the match results
        result = predict_points_and_scores(home_team, away_team)

        ## Updating the standings table with the predicted points
        standings.loc[standings['Team'] == home_team, 'Pts'] += result['Predicted Points'][0]
        standings.loc[standings['Team'] == away_team, 'Pts'] += result['Predicted Points'][1]

        ## Updating the goals for, goals against and goal difference columns in the standings table
        standings.loc[standings['Team'] == home_team, 'GF'] += result['Expected Home Goals']
        standings.loc[standings['Team'] == away_team, 'GF'] += result['Expected Away Goals']
        standings.loc[standings['Team'] == home_team, 'GA'] += result['Expected Away Goals']
        standings.loc[standings['Team'] == away_team, 'GA'] += result['Expected Home Goals']
        standings.loc[standings['Team'] == home_team, 'GD'] += result['Expected Home Goals'] - result['Expected Away Goals']
        standings.loc[standings['Team'] == away_team, 'GD'] += result['Expected Away Goals'] - result['Expected Home Goals']


        ## Updating the played, won, drawn and lost columns in the standings table
        standings.loc[standings['Team'] == home_team, 'Pld'] += 1
        standings.loc[standings['Team'] == away_team, 'Pld'] += 1

        standings.loc[standings['Team'] == home_team, 'W'] += 1 if result['Predicted Points'][0] == 3 else 0
        standings.loc[standings['Team'] == away_team, 'W'] += 1 if result['Predicted Points'][1] == 3 else 0
        standings.loc[standings['Team'] == home_team, 'D'] += 1 if result['Predicted Points'][0] == 1 else 0
        standings.loc[standings['Team'] == away_team, 'D'] += 1 if result['Predicted Points'][1] == 1 else 0
        standings.loc[standings['Team'] == home_team, 'L'] += 1 if result['Predicted Points'][0] == 0 else 0
        standings.loc[standings['Team'] == away_team, 'L'] += 1 if result['Predicted Points'][1] == 0 else 0

        ## Updating the score column in the draws table
        data.at[index, 'Score'] = f'{result["Expected Home Goals"]}:{result["Expected Away Goals"]}'

    return 0


##### Updating the draws and standings tables for the first round (1-8 matches)

In [383]:
for i in range(8):
    update_tables(draws[i], standings)

In [384]:
## drawws updated
draws_updated = draws.copy()
draws_updated[3]

Unnamed: 0,Home team,Score,Away team
0,Slavia Prague,0:1,Arsenal
1,Napoli,1:1,Eintracht Frankfurt
2,Atlético Madrid,1:0,Union Saint-Gilloise
3,Bodø/Glimt,1:1,Monaco
4,Juventus,1:0,Sporting CP
5,Liverpool,1:1,Real Madrid
6,Olympiacos,1:1,PSV Eindhoven
7,Paris Saint-Germain,1:1,Bayern Munich
8,Tottenham Hotspur,1:1,Copenhagen
9,Pafos,0:3,Villarreal


In [385]:
## standings updated
standings_updated = standings.copy()
standings_updated

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts
0,1,Ajax,8,2,6,0,8,4,4,12
1,2,Arsenal,8,4,4,0,8,5,3,16
2,3,Atalanta,8,3,3,2,8,6,2,12
3,4,Athletic Bilbao,8,0,7,1,7,7,0,7
4,5,Atlético Madrid,8,2,6,0,6,4,2,12
5,6,Barcelona,8,6,2,0,8,4,4,20
6,7,Bayer Leverkusen,8,0,6,2,8,8,0,6
7,8,Bayern Munich,8,5,3,0,10,3,7,18
8,9,Benfica,8,0,7,1,8,7,1,7
9,10,Bodø/Glimt,8,5,3,0,8,6,2,18


In [386]:
## Sorting the standings table by points, goal difference and goals for
standings_updated.sort_values(by=['Pts', 'GD', 'GF'], ascending=False, inplace=True)

standings_updated['Pos'] = range(1, len(standings_updated) + 1)
standings_updated

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts
5,1,Barcelona,8,6,2,0,8,4,4,20
7,2,Bayern Munich,8,5,3,0,10,3,7,18
9,3,Bodø/Glimt,8,5,3,0,8,6,2,18
30,4,Real Madrid,8,5,3,0,8,6,2,18
27,5,Paris Saint-Germain,8,5,3,0,8,7,1,18
19,6,Liverpool,8,4,4,0,8,2,6,16
1,7,Arsenal,8,4,4,0,8,5,3,16
20,8,Manchester City,8,4,4,0,8,6,2,16
17,9,Juventus,8,3,5,0,10,5,5,14
16,10,Inter Milan,8,3,5,0,6,4,2,14


#### Saving tables to an html file

In [387]:
standings_updated.to_html('standings_updated.html', index=False)

pd.concat([draws_updated[i] for i in range(8)], ignore_index=True).to_html('matches_predictions.html', index=False)

### Table containing the draws for the first round