In [None]:
import pandas as pd
import requests

API_TOKEN = "08ebac818e7c47a8be0c9824a6972b48"
headers = {"X-Auth-Token": API_TOKEN}

def fetch_matches(season_year):
    url = f"https://api.football-data.org/v4/competitions/PL/matches?season={season_year}"
    response = requests.get(url, headers=headers)
    data = response.json()

    matches = pd.DataFrame(data["matches"])

    matches["homeTeam"] = matches["homeTeam"].apply(lambda x: x["name"])
    matches["awayTeam"] = matches["awayTeam"].apply(lambda x: x["name"])
    matches["score"] = matches["score"].apply(lambda x: x["winner"])  
    matches["season"] = season_year

    return matches

# Fetch and combine
matches_2023 = fetch_matches(2023)
matches_2024 = fetch_matches(2024)
all_matches = pd.concat([matches_2023, matches_2024], ignore_index=True) #merge data frames

# Output sample and counts
print(all_matches.head())
print(all_matches["season"].value_counts())

                                                area  \
0  {'id': 2072, 'name': 'England', 'code': 'ENG',...   
1  {'id': 2072, 'name': 'England', 'code': 'ENG',...   
2  {'id': 2072, 'name': 'England', 'code': 'ENG',...   
3  {'id': 2072, 'name': 'England', 'code': 'ENG',...   
4  {'id': 2072, 'name': 'England', 'code': 'ENG',...   

                                         competition  season      id  \
0  {'id': 2021, 'name': 'Premier League', 'code':...    2023  435943   
1  {'id': 2021, 'name': 'Premier League', 'code':...    2023  435944   
2  {'id': 2021, 'name': 'Premier League', 'code':...    2023  435945   
3  {'id': 2021, 'name': 'Premier League', 'code':...    2023  435946   
4  {'id': 2021, 'name': 'Premier League', 'code':...    2023  435947   

                utcDate    status  matchday           stage group  \
0  2023-08-11T19:00:00Z  FINISHED         1  REGULAR_SEASON  None   
1  2023-08-12T12:00:00Z  FINISHED         1  REGULAR_SEASON  None   
2  2023-08-12T14:00:00Z

In [6]:
# Home wins
home_wins = all_matches[all_matches["score"] == "HOME_TEAM"].groupby("homeTeam").size()

# Away wins
away_wins = all_matches[all_matches["score"] == "AWAY_TEAM"].groupby("awayTeam").size()

# Total wins = home + away
total_wins = home_wins.add(away_wins, fill_value=0).astype(int)

# Sort by most wins
total_wins = total_wins.sort_values(ascending=False)

# View top teams
print(total_wins.head(10))

homeTeam
Manchester City FC           49
Liverpool FC                 49
Arsenal FC                   48
Aston Villa FC               39
Newcastle United FC          38
Chelsea FC                   38
Tottenham Hotspur FC         31
Manchester United FC         29
Nottingham Forest FC         28
Brighton & Hove Albion FC    28
dtype: int64


In [7]:
draws = all_matches[all_matches["score"] == "DRAW"]
home_draws = draws.groupby("homeTeam").size()
away_draws = draws.groupby("awayTeam").size()
total_draws = home_draws.add(away_draws, fill_value=0)

# Total matches played per team
home_matches = all_matches.groupby("homeTeam").size()
away_matches = all_matches.groupby("awayTeam").size()
total_matches = home_matches.add(away_matches, fill_value=0)

team_stats = pd.DataFrame({
    "Wins": total_wins,
    "Draws": total_draws,
    "Matches Played": total_matches
}).fillna(0).astype(int).sort_values("Wins", ascending=False)

print(team_stats.head(10))

                           Wins  Draws  Matches Played
homeTeam                                              
Manchester City FC           49     15              76
Liverpool FC                 49     19              76
Arsenal FC                   48     19              76
Aston Villa FC               39     17              76
Newcastle United FC          38     12              76
Chelsea FC                   38     18              76
Tottenham Hotspur FC         31     11              76
Manchester United FC         29     15              76
Nottingham Forest FC         28     17              76
Brighton & Hove Albion FC    28     25              76


In [8]:
total_points = (total_wins * 3).add(total_draws * 1, fill_value=0).astype(int)

total_points = total_points.sort_values(ascending=False)

print(total_points.head(10))

homeTeam
Liverpool FC                 166
Arsenal FC                   163
Manchester City FC           162
Aston Villa FC               134
Chelsea FC                   132
Newcastle United FC          126
Brighton & Hove Albion FC    109
AFC Bournemouth              104
Tottenham Hotspur FC         104
Crystal Palace FC            102
dtype: int64


In [9]:
match_counts = pd.concat([
    all_matches["homeTeam"],
    all_matches["awayTeam"]
]).value_counts()

max_points = match_counts * 3
probability_win = total_points/(max_points*20)
probability_win = probability_win.sort_values(ascending=False)
print(probability_win)

Liverpool FC                  0.036404
Arsenal FC                    0.035746
Manchester City FC            0.035526
Aston Villa FC                0.029386
Chelsea FC                    0.028947
Newcastle United FC           0.027632
Brighton & Hove Albion FC     0.023904
AFC Bournemouth               0.022807
Tottenham Hotspur FC          0.022807
Crystal Palace FC             0.022368
Manchester United FC          0.022368
Nottingham Forest FC          0.022149
Fulham FC                     0.022149
Everton FC                    0.021053
Brentford FC                  0.020833
West Ham United FC            0.020833
Wolverhampton Wanderers FC    0.019298
Luton Town FC                 0.011404
Leicester City FC             0.010965
Burnley FC                    0.010526
Ipswich Town FC               0.009649
Sheffield United FC           0.007018
Southampton FC                0.005263
dtype: float64
