### THIS CODE WORKED

In [183]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_and_save_full_match(url):
    """Scrapes outfield stats, goalkeeper stats, and shots data for a match and saves CSVs automatically."""

    # Extract game_id from URL
    game_id = url.split("/")[5]

    # Fetch page
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Get match date
    date_element = soup.select_one(".venuetime")
    match_date = date_element.text.strip() if date_element else "N/A"

    # Get full team names using 'title' attribute fallback
    scorebox = soup.select_one('.scorebox')
    teams = []
    if scorebox:
        team_divs = scorebox.find_all('div', recursive=False)[:2]  # Only left/right team blocks
        for div in team_divs:
            team_tag = div.find('a')
            if team_tag:
                team_name = team_tag.get('title') or team_tag.text
                teams.append(team_name.strip())

    if len(teams) != 2:
        raise Exception(f"Couldn't find two team names. Teams found: {teams}")

    home_team, away_team = teams

    # Get all tables
    all_tables = soup.find_all("table")
    summary_tables = [t for t in all_tables if "summary" in t.get("id", "") and "keeper" not in t.get("id", "")]
    keeper_tables = [t for t in all_tables if "keeper" in t.get("id", "")]
    shots_tables = [t for t in all_tables if "shots" in t.get("id", "")]

    outfield_rows = []
    goalkeeper_rows = []
    shots_rows = []

    # Outfield players (2 tables)
    for idx, table in enumerate(summary_tables):
        team = home_team if idx == 0 else away_team
        opponent = away_team if idx == 0 else home_team
        home_away = "Home" if idx == 0 else "Away"

        header_row = table.find("thead").find_all("tr")[-1]
        headers = [th.text.strip() for th in header_row.find_all("th")]

        for row in table.find("tbody").find_all("tr"):
            if row.get('class') and "thead" in row.get('class'):
                continue
            cells = [cell.text.strip() for cell in row.find_all(["th", "td"])]
            if len(cells) == len(headers):
                player_data = dict(zip(headers, cells))

                # ✨ Clean player name and create player_id
                player_name = player_data.get("Player", "").strip()
                jersey = player_data.get("#", "").strip()
                player_data["Player"] = player_name
                player_data["#"] = jersey
                player_data["player_id"] = f"{jersey}{player_name}"

                outfield_rows.append(player_data)

    # Goalkeepers (2 tables)
    for idx, table in enumerate(keeper_tables):
        team = home_team if idx == 0 else away_team
        opponent = away_team if idx == 0 else home_team
        home_away = "Home" if idx == 0 else "Away"

        header_row = table.find("thead").find_all("tr")[-1]
        headers = [th.text.strip() for th in header_row.find_all("th")]

        for row in table.find("tbody").find_all("tr"):
            if row.get('class') and "thead" in row.get('class'):
                continue
            cells = [cell.text.strip() for cell in row.find_all(["th", "td"])]
            if len(cells) == len(headers):
                player_data = dict(zip(headers, cells))

                goalkeeper_rows.append(player_data)

    # Shots (1 table shared)
    for table in shots_tables:
        header_row = table.find("thead").find_all("tr")[-1]
        headers = [th.text.strip() for th in header_row.find_all("th")]

        for row in table.find("tbody").find_all("tr"):
            cells = [cell.text.strip() for cell in row.find_all(["th", "td"])]
            if len(cells) == len(headers):
                shot_data = dict(zip(headers, cells))
                shot_data.update({
                    "game_id": game_id,
                    "match_date": match_date
                })
                shots_rows.append(shot_data)

    # Convert to DataFrames
    df_outfield = pd.DataFrame(outfield_rows)
    df_goalkeepers = pd.DataFrame(goalkeeper_rows)
    df_shots = pd.DataFrame(shots_rows)

    # Save to CSVs
    df_outfield.to_csv(f"TEST{game_id}_summary.csv", index=False)
    df_goalkeepers.to_csv(f"TEST{game_id}_goalkeepers.csv", index=False)
    df_shots.to_csv(f"TEST{game_id}_shots.csv", index=False)

    print(f"✅ Scraped and saved match {game_id}:")
    print(f"  - {game_id}_summary.csv")
    print(f"  - {game_id}_goalkeepers.csv")
    print(f"  - {game_id}_shots.csv")


scrape_and_save_full_match("https://fbref.com/en/matches/13e27d90/Portland-Thorns-FC-Orlando-Pride-May-3-2025-NWSL")

✅ Scraped and saved match 13e27d90:
  - 13e27d90_summary.csv
  - 13e27d90_goalkeepers.csv
  - 13e27d90_shots.csv


### PULL AND CLEAN SUMMARY AND GOALKEEPER TABLES

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_and_save_full_match(url):
    """Scrapes outfield stats, goalkeeper stats, and shots data for a match and saves CSVs automatically."""

    # Extract game_id from URL
    game_id = url.split("/")[5]

    # Fetch page
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Get match date
    date_element = soup.select_one(".venuetime")
    match_date = date_element.text.strip() if date_element else "N/A"

    # Get full team names using 'title' attribute fallback
    scorebox = soup.select_one('.scorebox')
    teams = []
    if scorebox:
        team_divs = scorebox.find_all('div', recursive=False)[:2]
        for div in team_divs:
            team_tag = div.find('a')
            if team_tag:
                team_name = team_tag.get('title') or team_tag.text
                teams.append(team_name.strip())

    if len(teams) != 2:
        raise Exception(f"Couldn't find two team names. Teams found: {teams}")

    home_team, away_team = teams

    # Get all tables
    all_tables = soup.find_all("table")
    summary_tables = [t for t in all_tables if "summary" in t.get("id", "") and "keeper" not in t.get("id", "")]
    keeper_tables = [t for t in all_tables if "keeper" in t.get("id", "")]
    shots_tables = [t for t in all_tables if "shots" in t.get("id", "")]

    outfield_rows = []
    goalkeeper_rows = []
    shots_rows = []

    # OUTFIELD PLAYERS
    for idx, table in enumerate(summary_tables):
        team = home_team if idx == 0 else away_team
        opponent = away_team if idx == 0 else home_team
        home_away = "Home" if idx == 0 else "Away"

        header_row = table.find("thead").find_all("tr")[-1]
        headers = [th.text.strip() for th in header_row.find_all("th")]

        for row in table.find("tbody").find_all("tr"):
            if row.get('class') and "thead" in row.get('class'):
                continue
            cells = [cell.text.strip() for cell in row.find_all(["th", "td"])]
            if len(cells) == len(headers):
                player_data = dict(zip(headers, cells))

                player_name = player_data.get("Player", "").strip()
                jersey = player_data.get("#", "").strip()
                clean_name = player_name.replace(" ", "")
                player_id = f"{jersey}{clean_name}{game_id}"

                player_data.update({
                    "game_id": game_id,
                    "player_id": player_id
                })

                outfield_rows.append(player_data)

    df_outfield = pd.DataFrame(outfield_rows)

    # GOALKEEPERS
    for idx, table in enumerate(keeper_tables):
        team = home_team if idx == 0 else away_team
        opponent = away_team if idx == 0 else home_team
        home_away = "Home" if idx == 0 else "Away"
    
        header_row = table.find("thead").find_all("tr")[-1]
        headers = [th.text.strip() for th in header_row.find_all("th")]
    
        for row in table.find("tbody").find_all("tr"):
            if row.get('class') and "thead" in row.get('class'):
                continue
            cells = [cell.text.strip() for cell in row.find_all(["th", "td"])]
            if len(cells) == len(headers):
                player_data = dict(zip(headers, cells))
    
                player_name = player_data.get("Player", "").strip()
                jersey = player_data.get("#", "").strip()
                clean_name = player_name.replace(" ", "")
                player_id = f"{jersey}{clean_name}"
    
                # Fallback if jersey is missing — try to match player name in df_outfield
                if not jersey and player_name in df_outfield["Player"].values:
                    matched = df_outfield[df_outfield["Player"] == player_name]
                    if not matched.empty:
                        jersey = matched.iloc[0]["#"]
                        player_id = matched.iloc[0]["player_id"]
    
                player_data.update({
                    "#": jersey,
                    "game_id": game_id,
                    "player_id": player_id
                })
    
                goalkeeper_rows.append(player_data)

    # Save CSVs
    df_goalkeepers = pd.DataFrame(goalkeeper_rows)

    df_outfield.to_csv(f"{game_id}_summary.csv", index=False)
    df_goalkeepers.to_csv(f"{game_id}_goalkeepers.csv", index=False)

    print(f"✅ Scraped and saved match {game_id}:")
    print(f"  - {game_id}_summary.csv")
    print(f"  - {game_id}_goalkeepers.csv")

# Example use
scrape_and_save_full_match("https://fbref.com/en/matches/7958f078/Portland-Thorns-FC-Angel-City-FC-March-21-2025-NWSL")


✅ Scraped and saved match 7958f078:
  - 7958f078_summary.csv
  - 7958f078_goalkeepers.csv


### GAME SCRAPER

In [77]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from functools import reduce

def scrape_and_save_full_match_all_tables(url):
    """Scrapes full outfield player stats (all tables), goalkeepers, and shots, and saves 3 CSVs."""

    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    game_id = url.split("/")[5]

    # Match date
    date_element = soup.select_one(".venuetime")
    match_date = date_element.text.strip() if date_element else "N/A"

    # Team names
    scorebox = soup.select_one('.scorebox')
    teams = []
    if scorebox:
        team_divs = scorebox.find_all('div', recursive=False)[:2]
        for div in team_divs:
            team_tag = div.find('a')
            if team_tag:
                team_name = team_tag.get('title') or team_tag.text
                teams.append(team_name.strip())
    if len(teams) != 2:
        raise Exception("Could not find two team names.")
    home_team, away_team = teams

    # ---- OUTFIELD PLAYER STATS (All Tables) ---- #
    table_types = {
        "summary": "Summary",
        "passing": "Passing",
        "passing_types": "Pass Types",
        "defense": "Defensive",
        "possession": "Possession",
        "misc": "Miscellaneous"
    }

    all_dfs = []
    join_cols = ["Player", "match_date", "game_id", "team", "opponent", "home_away"]

    for key in table_types:
        tables = [t for t in soup.find_all("table") if key in t.get("id", "") and "keeper" not in t.get("id", "")]

        for idx, table in enumerate(tables):
            team = home_team if idx == 0 else away_team
            opponent = away_team if idx == 0 else home_team
            home_away = "Home" if idx == 0 else "Away"

            header_row = table.find("thead").find_all("tr")[-1]
            headers = [th.text.strip() for th in header_row.find_all("th")]

            rows = []
            for row in table.find("tbody").find_all("tr"):
                # Skip header rows inside body (multi-level headers)
                if row.get('class') and "thead" in row.get('class'):
                    continue
                # Skip empty rows
                if len(row.find_all(["th", "td"])) == 0:
                    continue
                cells = [cell.text.strip() for cell in row.find_all(["th", "td"])]
                if len(cells) != len(headers):
                    continue
                player_data = dict(zip(headers, cells))
                # Skip team summary rows
                if player_data.get("Player", "").lower() in ["team totals", ""]:
                    continue
                player_data.update({
                    "game_id": game_id,
                    "match_date": match_date,
                    "team": team,
                    "opponent": opponent,
                    "home_away": home_away
                })
                rows.append(player_data)

            if not rows:
                continue  # If no valid rows, skip

            df = pd.DataFrame(rows)

            # Rename all stat columns (not join columns)
            df = df.rename(columns={col: f"{key}_{col}" if col not in join_cols else col for col in df.columns})

            all_dfs.append(df)

    if all_dfs:
        # Merge safely with outer join
        df_outfield = reduce(lambda left, right: pd.merge(left, right, on=join_cols, how="outer"), all_dfs)
        df_outfield.to_csv(f"{game_id}_outfield_players.csv", index=False)
        print(f"✅ Saved {game_id}_outfield_players.csv")
    else:
        print(f"⚠️ No outfield player data found for {game_id}")
        df_outfield = pd.DataFrame()

    # ---- GOALKEEPER TABLES ---- #
    keeper_tables = [t for t in soup.find_all("table") if "keeper" in t.get("id", "")]
    goalkeeper_rows = []

    for idx, table in enumerate(keeper_tables):
        team = home_team if idx == 0 else away_team
        opponent = away_team if idx == 0 else home_team
        home_away = "Home" if idx == 0 else "Away"

        header_row = table.find("thead").find_all("tr")[-1]
        headers = [th.text.strip() for th in header_row.find_all("th")]

        for row in table.find("tbody").find_all("tr"):
            if row.get('class') and "thead" in row.get('class'):
                continue
            cells = [cell.text.strip() for cell in row.find_all(["th", "td"])]
            if len(cells) == len(headers):
                player_data = dict(zip(headers, cells))
                player_data.update({
                    "game_id": game_id,
                    "match_date": match_date,
                    "team": team,
                    "opponent": opponent,
                    "home_away": home_away
                })
                goalkeeper_rows.append(player_data)

    df_goalkeepers = pd.DataFrame(goalkeeper_rows)
    df_goalkeepers.to_csv(f"{game_id}_goalkeepers.csv", index=False)
    print(f"✅ Saved {game_id}_goalkeepers.csv")

    # ---- SHOTS TABLE ---- #
    shot_tables = [t for t in soup.find_all("table") if "shots" in t.get("id", "")]
    shots_rows = []

    for table in shot_tables:
        header_row = table.find("thead").find_all("tr")[-1]
        headers = [th.text.strip() for th in header_row.find_all("th")]

        for row in table.find("tbody").find_all("tr"):
            cells = [cell.text.strip() for cell in row.find_all(["th", "td"])]
            if len(cells) == len(headers):
                shot_data = dict(zip(headers, cells))
                shot_data.update({
                    "game_id": game_id,
                    "match_date": match_date
                })
                shots_rows.append(shot_data)

    df_shots = pd.DataFrame(shots_rows)
    df_shots.to_csv(f"{game_id}_shots.csv", index=False)
    print(f"✅ Saved {game_id}_shots.csv")


In [79]:
for url in match_urls:
    try:
        scrape_and_save_full_match_all_tables(url)
    except Exception as e:
        print(f"❌ Error scraping {url}: {e}")


❌ Error scraping https://fbref.com/en/matches/7239a666/Kansas-City-Current-Portland-Thorns-FC-March-15-2025-NWSL: Passing 'suffixes' which cause duplicate columns {'passing_Age_x', 'passing_Nation_x', 'passing_#_x', 'passing_Att_x', 'passing_Cmp_x', 'passing_Min_x', 'passing_Pos_x'} is not allowed.
❌ Error scraping https://fbref.com/en/matches/7958f078/Portland-Thorns-FC-Angel-City-FC-March-21-2025-NWSL: Passing 'suffixes' which cause duplicate columns {'passing_Age_x', 'passing_Nation_x', 'passing_#_x', 'passing_Att_x', 'passing_Cmp_x', 'passing_Min_x', 'passing_Pos_x'} is not allowed.
❌ Error scraping https://fbref.com/en/matches/475a847a/Portland-Thorns-FC-North-Carolina-Courage-March-29-2025-NWSL: Passing 'suffixes' which cause duplicate columns {'passing_Age_x', 'passing_Nation_x', 'passing_#_x', 'passing_Att_x', 'passing_Cmp_x', 'passing_Min_x', 'passing_Pos_x'} is not allowed.
❌ Error scraping https://fbref.com/en/matches/fb58cf7f/Utah-Royals-Portland-Thorns-FC-April-11-2025-NWS

In [91]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import os

def scrape_and_save_full_match_by_table(url):
    """Scrapes each player stat table separately and saves a CSV for each one."""

    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    game_id = url.split("/")[5]
    match_date = soup.select_one(".venuetime").text.strip() if soup.select_one(".venuetime") else "N/A"

    scorebox = soup.select_one('.scorebox')
    teams = []
    if scorebox:
        team_divs = scorebox.find_all('div', recursive=False)[:2]
        for div in team_divs:
            team_tag = div.find('a')
            if team_tag:
                team_name = team_tag.get('title') or team_tag.text
                teams.append(team_name.strip())
    if len(teams) != 2:
        raise Exception("Could not find two team names.")
    home_team, away_team = teams

    table_types = {
        "summary": "Summary",
        "passing": "Passing",
        "passing_types": "Pass Types",
        "defense": "Defensive",
        "possession": "Possession",
        "misc": "Miscellaneous"
    }

    join_cols = ["Player", "match_date", "game_id", "team", "opponent", "home_away"]

    def flatten_headers(table):
        thead = table.find("thead")
        rows = thead.find_all("tr")
        if len(rows) == 1:
            return [th.text.strip() for th in rows[0].find_all("th")]
        row1 = [th.text.strip() for th in rows[0].find_all("th")]
        row2 = [th.text.strip() for th in rows[1].find_all("th")]
        flat_headers = []
        last_label = ""
        for i in range(len(row2)):
            top = row1[i] if i < len(row1) and row1[i] != "" else last_label
            last_label = top
            bottom = row2[i] if i < len(row2) else ""
            if top == "":
                flat_headers.append(bottom)
            elif bottom == "":
                flat_headers.append(top)
            else:
                flat_headers.append(f"{top}_{bottom}")
        return flat_headers

    for key in table_types:
        tables = [t for t in soup.find_all("table") if key in t.get("id", "") and "keeper" not in t.get("id", "")]
        tables = tables[:2]  # Limit to 1 per team

        all_rows = []
        for idx, table in enumerate(tables):
            team = home_team if idx == 0 else away_team
            opponent = away_team if idx == 0 else home_team
            home_away = "Home" if idx == 0 else "Away"

            headers = flatten_headers(table)

            for row in table.find("tbody").find_all("tr"):
                if row.get('class') and "thead" in row.get('class'):
                    continue
                cells = [cell.text.strip() for cell in row.find_all(["th", "td"])]
                if len(cells) != len(headers):
                    continue
                player_data = dict(zip(headers, cells))
                if player_data.get("Player", "").lower() in ["team totals", ""]:
                    continue
                player_data.update({
                    "game_id": game_id,
                    "match_date": match_date,
                    "team": team,
                    "opponent": opponent,
                    "home_away": home_away
                })
                all_rows.append(player_data)

        if all_rows:
            df = pd.DataFrame(all_rows)
            df.to_csv(f"{game_id}_{key}.csv", index=False)
            print(f"✅ Saved {game_id}_{key}.csv")

    # GOALKEEPER TABLES
    keeper_tables = [t for t in soup.find_all("table") if "keeper" in t.get("id", "")]
    keeper_rows = []

    for idx, table in enumerate(keeper_tables):
        team = home_team if idx == 0 else away_team
        opponent = away_team if idx == 0 else home_team
        home_away = "Home" if idx == 0 else "Away"

        headers = flatten_headers(table)

        for row in table.find("tbody").find_all("tr"):
            if row.get('class') and "thead" in row.get('class'):
                continue
            cells = [cell.text.strip() for cell in row.find_all(["th", "td"])]
            if len(cells) == len(headers):
                player_data = dict(zip(headers, cells))
                player_data.update({
                    "game_id": game_id,
                    "match_date": match_date,
                    "team": team,
                    "opponent": opponent,
                    "home_away": home_away
                })
                keeper_rows.append(player_data)

    if keeper_rows:
        df_gk = pd.DataFrame(keeper_rows)
        df_gk.to_csv(f"{game_id}_goalkeepers.csv", index=False)
        print(f"✅ Saved {game_id}_goalkeepers.csv")

    # SHOTS TABLE
    shot_tables = [t for t in soup.find_all("table") if "shots" in t.get("id", "")]
    shots_rows = []

    for table in shot_tables:
        headers = flatten_headers(table)
        for row in table.find("tbody").find_all("tr"):
            cells = [cell.text.strip() for cell in row.find_all(["th", "td"])]
            if len(cells) == len(headers):
                shot_data = dict(zip(headers, cells))
                shot_data.update({
                    "game_id": game_id,
                    "match_date": match_date
                })
                shots_rows.append(shot_data)

    if shots_rows:
        df_shots = pd.DataFrame(shots_rows)
        df_shots.to_csv(f"{game_id}_shots.csv", index=False)
        print(f"✅ Saved {game_id}_shots.csv")
scrape_and_save_full_match_by_table("https://fbref.com/en/matches/1c087799/Portland-Thorns-FC-Racing-Louisville-April-27-2025-NWSL")


✅ Saved 1c087799_summary.csv
✅ Saved 1c087799_passing.csv
✅ Saved 1c087799_passing_types.csv
✅ Saved 1c087799_defense.csv
✅ Saved 1c087799_possession.csv
✅ Saved 1c087799_misc.csv
✅ Saved 1c087799_goalkeepers.csv
✅ Saved 1c087799_shots.csv


### META DATA SCRAPER

In [56]:
def scrape_match_metadata(url):
    """Scrape metadata from a single match report page."""
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    game_id = url.split("/")[5]

    date_element = soup.select_one(".venuetime")
    match_date = date_element.text.strip() if date_element else "N/A"

    scorebox = soup.select_one('.scorebox')
    if not scorebox:
        raise Exception("Scorebox not found.")

    team_divs = scorebox.find_all('div', recursive=False)[:2]
    if len(team_divs) != 2:
        raise Exception("Expected 2 team divs.")

    teams = []
    scores = []
    for div in team_divs:
        team_tag = div.find('a')
        score_tag = div.find('div', class_='score')
        if team_tag and score_tag:
            team_name = team_tag.get('title') or team_tag.text
            teams.append(team_name.strip())
            scores.append(score_tag.text.strip())

    if len(teams) != 2 or len(scores) != 2:
        raise Exception("Could not extract both teams and scores.")

    return {
        "game_id": game_id,
        "match_date": match_date,
        "home_team": teams[0],
        "away_team": teams[1],
        "home_score": int(scores[0]),
        "away_score": int(scores[1])
    }


In [170]:
import requests
from bs4 import BeautifulSoup, Comment
import pandas as pd

def scrape_match_metadata(match_urls):
    all_matches = []

    for url in match_urls:
        try:
            response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
            response.raise_for_status()
        except Exception as e:
            print(f"❌ Failed to fetch {url}: {e}")
            continue

        soup = BeautifulSoup(response.text, 'html.parser')
        game_id = url.split("/")[-2]

        # Match date
        date_tag = soup.select_one(".venuetime")
        match_date = date_tag.text.strip() if date_tag else None

        # Teams
        scorebox = soup.select_one(".scorebox")
        teams = scorebox.find_all("a", href=True) if scorebox else []
        team_names = [t.get('title') or t.text.strip() for t in teams if "/en/squads/" in t['href']]
        if len(team_names) != 2:
            print(f"⚠️ Could not determine both teams for {url}")
            continue
        home_team, away_team = team_names

        # Search for commented-out HTML that contains team stats
        comments = soup.find_all(string=lambda text: isinstance(text, Comment))
        team_stats_table = None
        for comment in comments:
            if "team_stats" in comment:
                comment_soup = BeautifulSoup(comment, 'html.parser')
                table = comment_soup.find("table", id="team_stats")
                if table:
                    team_stats_table = table
                    break

        if not team_stats_table:
            print(f"⚠️ No team stats table found for {url}")
            continue

        # Extract stat rows
        stats = {
            "game_id": game_id,
            "match_date": match_date,
            "home_team": home_team,
            "away_team": away_team
        }

        rows = team_stats_table.find_all("tr")
        for row in rows:
            cells = row.find_all("td")
            if len(cells) == 3:
                label = cells[0].text.strip().lower().replace(" ", "_")
                stats[f"home_{label}"] = cells[1].text.strip()
                stats[f"away_{label}"] = cells[2].text.strip()

        all_matches.append(stats)

    if all_matches:
        df = pd.DataFrame(all_matches)
        df.to_csv("match_data.csv", index=False)
        print("✅ Saved match_data.csv")
    else:
        print("⚠️ No match data collected")


In [174]:
match_urls = [
    # "https://fbref.com/en/matches/7239a666/Kansas-City-Current-Portland-Thorns-FC-March-15-2025-NWSL",
    # "https://fbref.com/en/matches/7958f078/Portland-Thorns-FC-Angel-City-FC-March-21-2025-NWSL",
    # "https://fbref.com/en/matches/475a847a/Portland-Thorns-FC-North-Carolina-Courage-March-29-2025-NWSL",
    # "https://fbref.com/en/matches/fb58cf7f/Utah-Royals-Portland-Thorns-FC-April-11-2025-NWSL",
    # "https://fbref.com/en/matches/71e1c7c8/Seattle-Reign-FC-Portland-Thorns-FC-April-18-2025-NWSL",
    # "https://fbref.com/en/matches/414d2972/Portland-Thorns-FC-Gotham-FC-April-22-2025-NWSL",
    # "https://fbref.com/en/matches/1c087799/Portland-Thorns-FC-Racing-Louisville-April-27-2025-NWSL",
    "https://fbref.com/en/matches/13e27d90/Portland-Thorns-FC-Orlando-Pride-May-3-2025-NWSL"
    # Add all your match report URLs here
]

scrape_match_metadata(match_urls)

# # Collect metadata
# match_metadata = []

# for url in match_urls:
#     try:
#         data = scrape_match_metadata(url)
#         match_metadata.append(data)
#     except Exception as e:
#         print(f"❌ Failed to scrape {url}: {e}")

# # Convert to DataFrame
# df_metadata = pd.DataFrame(match_metadata)

# # Save to CSV
# df_metadata.to_csv("thorns_2025_match_metadata.csv", index=False)
# print("✅ Saved match metadata to thorns_2025_match_metadata.csv")


⚠️ No team stats table found for https://fbref.com/en/matches/13e27d90/Portland-Thorns-FC-Orlando-Pride-May-3-2025-NWSL
⚠️ No match data collected
