In [None]:
import requests
import pandas as pd
import time

In [None]:
import requests
import pandas as pd
import time

BASE_URL = "https://www.thesportsdb.com/api/v1/json/3"
LEAGUE_ID = "4335"

def get_seasons(league_id):
    url = f"{BASE_URL}/search_all_seasons.php?id={league_id}"
    response = requests.get(url).json()
    return [season["strSeason"] for season in response.get("seasons", [])]

def get_round_matches(league_id, round_number, season):
    url = f"{BASE_URL}/eventsround.php?id={league_id}&r={round_number}&s={season}"
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to fetch data: {response.status_code}")
        return None
    data = response.json()
    events = data.get("events")
    if not events:  # Handles both None and empty list
        return None
    return events

def calculate_last_5_goals(team_matches, team):
    recent_matches = [m for m in team_matches if m.get("intHomeScore") is not None and m.get("intAwayScore") is not None]
    recent_matches = sorted(recent_matches, key=lambda m: int(m["intRound"]))[-5:]
    total_goals = 0
    for m in recent_matches:
        if m["strHomeTeam"] == team:
            total_goals += int(m["intHomeScore"])
        elif m["strAwayTeam"] == team:
            total_goals += int(m["intAwayScore"])
    return total_goals

def calculate_team_stats(team_matches, team):
    valid_matches = [m for m in team_matches if m.get("intHomeScore") is not None and m.get("intAwayScore") is not None]
    scored = 0
    conceded = 0
    wins = 0

    for m in valid_matches:
        hs = int(m["intHomeScore"])
        as_ = int(m["intAwayScore"])
        if m["strHomeTeam"] == team:
            scored += hs
            conceded += as_
            if hs > as_:
                wins += 1
        elif m["strAwayTeam"] == team:
            scored += as_
            conceded += hs
            if as_ > hs:
                wins += 1

    games_played = len(valid_matches)
    win_rate = wins / games_played if games_played else 0
    goal_diff = scored - conceded
    return scored, conceded, goal_diff, win_rate

def update_team_stats_post_round(matches, team_stats_tracker):
    for match in matches:
        home = match["strHomeTeam"]
        away = match["strAwayTeam"]
        hs = int(match["intHomeScore"])
        as_ = int(match["intAwayScore"])

        team_stats_tracker[home]["scored"] += hs
        team_stats_tracker[home]["conceded"] += as_
        team_stats_tracker[away]["scored"] += as_
        team_stats_tracker[away]["conceded"] += hs

        if hs > as_:
            team_stats_tracker[home]["pts"] += 3
            team_stats_tracker[home]["homeWins"] += 1
            team_stats_tracker[away]["awayLosses"] += 1
        elif hs < as_:
            team_stats_tracker[away]["pts"] += 3
            team_stats_tracker[away]["awayWins"] += 1
            team_stats_tracker[home]["homeLosses"] += 1
        else:
            team_stats_tracker[home]["pts"] += 1
            team_stats_tracker[away]["pts"] += 1
            team_stats_tracker[home]["homeDraws"] += 1
            team_stats_tracker[away]["awayDraws"] += 1

        team_stats_tracker[home]["GD"] = team_stats_tracker[home]["scored"] - team_stats_tracker[home]["conceded"]
        team_stats_tracker[away]["GD"] = team_stats_tracker[away]["scored"] - team_stats_tracker[away]["conceded"]

def calculate_rankings(team_stats_tracker):
    sorted_teams = sorted(
        team_stats_tracker.items(),
        key=lambda x: (x[1]["pts"], x[1]["GD"], x[1]["scored"]),
        reverse=True
    )
    for idx, (team, _) in enumerate(sorted_teams, start=1):
        team_stats_tracker[team]["rank"] = idx

def process_season(league_id, season):
    print(f"Processing season: {season}")

    teams = set()
    for round_number in [1, 2]:
        matches = get_round_matches(league_id, round_number, season)
        if matches is None:
            break
        for match in matches:
            teams.add(match.get("strHomeTeam"))
            teams.add(match.get("strAwayTeam"))

    teams = [t for t in teams if t]
    print(f"Teams in season: {len(teams)}")

    num_teams = len(teams)
    num_rounds = (num_teams - 1) * 2

    team_matches = {team: [] for team in teams}
    team_stats_tracker = {
        team: {
            "pts": 0, "scored": 0, "conceded": 0, "GD": 0, "rank": -1,
            "homeWins": 0, "homeDraws": 0, "homeLosses": 0,
            "awayWins": 0, "awayDraws": 0, "awayLosses": 0
        }
        for team in teams
    }

    data = []

    for round_number in range(1, num_rounds + 1):
        matches = get_round_matches(league_id, round_number, season)
        if matches is None:
            break
        time.sleep(0.5)

        valid_matches = []
        for match in matches:
            home_team = match.get("strHomeTeam")
            away_team = match.get("strAwayTeam")
            int_home_score = match.get("intHomeScore")
            int_away_score = match.get("intAwayScore")
            int_round = int(match.get("intRound", 0))

            if not all([home_team, away_team, int_home_score, int_away_score]):
                continue

            try:
                home_last5 = calculate_last_5_goals(team_matches[home_team], home_team)
                away_last5 = calculate_last_5_goals(team_matches[away_team], away_team)

                home_scored, home_conceded, home_gd, home_win_rate = calculate_team_stats(team_matches[home_team], home_team)
                away_scored, away_conceded, away_gd, away_win_rate = calculate_team_stats(team_matches[away_team], away_team)

                row = {
                "strLeague": match.get("strLeague"),
                "strSeason": season,
                "strHomeTeam": home_team,
                "strAwayTeam": away_team,
                "intHomeScore": int_home_score,
                "intAwayScore": int_away_score,
                "intRound": int_round,
                "homeScored": home_scored,
                "homeConceded": home_conceded,
                "homeGD": home_gd,
                "homeWinRate": home_win_rate,
                "awayScored": away_scored,
                "awayConceded": away_conceded,
                "awayGD": away_gd,
                "awayWinRate": away_win_rate,
                "homeClassement": team_stats_tracker[home_team]["rank"],
                "awayClassement": team_stats_tracker[away_team]["rank"],
                "lastHome5GamesScore": home_last5,
                "lastAway5GamesScore": away_last5,
                "homeWins": team_stats_tracker[home_team]["homeWins"],
                "homeDraws": team_stats_tracker[home_team]["homeDraws"],
                "homeLosses": team_stats_tracker[home_team]["homeLosses"],
                "awayWins": team_stats_tracker[away_team]["awayWins"],
                "awayDraws": team_stats_tracker[away_team]["awayDraws"],
                "awayLosses": team_stats_tracker[away_team]["awayLosses"]
                }

                data.append(row)
                print(row)
                valid_matches.append(match)
            except Exception as e:
                print(f"Error processing match: {match}")
                print(f"Error message: {e}")
                continue
        for match in valid_matches:
            team_matches[match["strHomeTeam"]].append(match)
            team_matches[match["strAwayTeam"]].append(match)

        update_team_stats_post_round(valid_matches, team_stats_tracker)

        if round_number > 0:
            calculate_rankings(team_stats_tracker)

    return pd.DataFrame(data)

def main():
    seasons = get_seasons(LEAGUE_ID)
    all_data = pd.DataFrame()

    for season in seasons:
        season_data = process_season(LEAGUE_ID, season)
        if season_data is None:
            continue
        all_data = pd.concat([all_data, season_data], ignore_index=True)

    all_data.to_csv("liga_data_with_home_away_stats.csv", index=False)
    print("Data saved to liga_data_with_home_away_stats.csv")

if __name__ == "__main__":
    main()


Processing season: 2002-2003
Teams in season: 0
Processing season: 2003-2004
Teams in season: 0
Processing season: 2004-2005
Teams in season: 0
Processing season: 2007-2008
Teams in season: 0
Processing season: 2008-2009
Teams in season: 0
Processing season: 2009-2010
Teams in season: 0
Processing season: 2010-2011
Teams in season: 0
Processing season: 2011-2012
Teams in season: 0
Processing season: 2012-2013
Teams in season: 20
{'strLeague': 'Spanish La Liga', 'strSeason': '2012-2013', 'strHomeTeam': 'Celta Vigo', 'strAwayTeam': 'Malaga', 'intHomeScore': '0', 'intAwayScore': '1', 'intRound': 1, 'homeScored': 0, 'homeConceded': 0, 'homeGD': 0, 'homeWinRate': 0, 'awayScored': 0, 'awayConceded': 0, 'awayGD': 0, 'awayWinRate': 0, 'homeClassement': -1, 'awayClassement': -1, 'lastHome5GamesScore': 0, 'lastAway5GamesScore': 0, 'homeWins': 0, 'homeDraws': 0, 'homeLosses': 0, 'awayWins': 0, 'awayDraws': 0, 'awayLosses': 0}
{'strLeague': 'Spanish La Liga', 'strSeason': '2012-2013', 'strHomeTeam