In [27]:
# Install required packages (run once)
# !pip install requests pandas

import requests
import pandas as pd

# MLB team IDs for the White Sox and Cubs
SOX_ID = 145
CUBS_ID = 112

def fetch_series_stats(start_date: str, end_date: str) -> pd.DataFrame:
    """
    Fetch per-player batting stats for all White Sox vs. Cubs games
    between start_date and end_date (inclusive).
    """
    # 1) Pull the schedule
    sched = requests.get(
        "https://statsapi.mlb.com/api/v1/schedule",
        params={"sportId": 1, "startDate": start_date, "endDate": end_date}
    ).json()

    # 2) Identify head-to-head gamePks
    game_pks = []
    game_dates = {}
    for day in sched.get("dates", []):
        date_str = day.get("date")
        for g in day.get("games", []):
            ids = {
                g["teams"]["away"]["team"]["id"],
                g["teams"]["home"]["team"]["id"]
            }
            if ids == {SOX_ID, CUBS_ID}:
                gp = g["gamePk"]
                game_pks.append(gp)
                game_dates[gp] = date_str

    if not game_pks:
        print("No White Sox vs. Cubs games found in that range.")
        return pd.DataFrame(
            columns=["Player","Team","PA","AB","R","H","TB","2B","3B","HR","RBI","BB","SO","SB","Pos", "HBP", "SH", "SF","GIDP"]
        )

    # 3) Fetch each boxscore and extract batting via the 'stats' field
    records = []
    for gp in game_pks:
        box = requests.get(f"https://statsapi.mlb.com/api/v1/game/{gp}/boxscore").json()
        date = game_dates[gp]
        for side in ("away", "home"):
            team = box["teams"][side]["team"]["name"]
            for pdata in box["teams"][side]["players"].values():
                stat_block = pdata.get("stats", {})
                bat = stat_block.get("batting")
                if not bat:
                    continue
                records.append({
                    "Date":   date,
                    "Team":   team,
                    "Player": pdata["person"]["fullName"],
                    "PA":     bat.get("plateAppearances"),
                    "AB":     bat.get("atBats"),
                    "R":      bat.get("runs"),
                    "H":      bat.get("hits"),
                    "TB":     bat.get("totalBases"),
                    "2B":     bat.get("doubles"),
                    "3B":     bat.get("triples"),
                    "HR":     bat.get("homeRuns"),
                    "RBI":    bat.get("rbi"),
                    "BB":     bat.get("baseOnBalls"),
                    "SO":     bat.get("strikeOuts"),
                    "SB":     bat.get("stolenBases"),
                })

    # 4) Build DataFrame and aggregate
    cols = ["Date","Team","Player","PA","AB","R","H","TB","2B","3B","HR","RBI","BB","SO","SB","Pos", "HBP", "SH", "SF","GIDP"]
    df = pd.DataFrame(records, columns=cols)
    series_df = df.groupby(["Player","Team"], as_index=False).sum(numeric_only=True)
    return series_df

# Execute in your Jupyter notebook
series_df = fetch_series_stats("2025-05-16", "2025-05-18")
series_df  # renders the table inline

# To save:
# series_df.to_csv("white_sox_vs_cubs_May16-18_2025.csv", index=False)


Unnamed: 0,Player,Team,PA,AB,R,H,TB,2B,3B,HR,RBI,BB,SO,SB,Pos,HBP,SH,SF,GIDP
0,Andrew Vaughn,Chicago White Sox,10,10,0,1,1,0,0,0,0,0,2,0,0.0,0.0,0.0,0.0,0.0
1,Brooks Baldwin,Chicago White Sox,9,8,0,1,1,0,0,0,0,1,2,0,0.0,0.0,0.0,0.0,0.0
2,Carson Kelly,Chicago Cubs,9,8,2,2,3,1,0,0,0,1,3,0,0.0,0.0,0.0,0.0,0.0
3,Chase Meidroth,Chicago White Sox,12,12,2,6,10,1,0,1,1,0,0,2,0.0,0.0,0.0,0.0,0.0
4,Dansby Swanson,Chicago Cubs,14,11,4,4,7,0,0,1,2,2,2,3,0.0,0.0,0.0,0.0,0.0
5,Edgar Quero,Chicago White Sox,12,11,0,2,2,0,0,0,1,1,1,0,0.0,0.0,0.0,0.0,0.0
6,Jon Berti,Chicago Cubs,3,3,0,0,0,0,0,0,0,0,3,0,0.0,0.0,0.0,0.0,0.0
7,Josh Rojas,Chicago White Sox,7,7,0,2,2,0,0,0,0,0,2,0,0.0,0.0,0.0,0.0,0.0
8,Joshua Palacios,Chicago White Sox,9,9,0,1,1,0,0,0,0,0,2,0,0.0,0.0,0.0,0.0,0.0
9,Justin Turner,Chicago Cubs,4,3,0,0,0,0,0,0,0,1,1,0,0.0,0.0,0.0,0.0,0.0


In [39]:
cubs_df= series_df[series_df["Team"] == "Chicago Cubs"]
whitesox_df= series_df[series_df["Team"] == "Chicago White Sox"]

In [29]:
import os

downloads_dir = os.path.join(os.path.expanduser("~"), "Downloads")
output_path = os.path.join(downloads_dir, "chicago_cubs_May16-18_2025.csv")
cubs_df.to_csv(output_path, index=False)

print(f"Cubs stats saved to: {output_path}")


Cubs stats saved to: C:\Users\pgper\Downloads\chicago_cubs_May16-18_2025.csv


In [40]:
downloads_dir = os.path.join(os.path.expanduser("~"), "Downloads")
output_path = os.path.join(downloads_dir, "white_sox_May16-18_2025.csv")

whitesox_df.to_csv(output_path, index=False)

print(f"White Sox stats saved to: {output_path}")


White Sox stats saved to: C:\Users\pgper\Downloads\white_sox_May16-18_2025.csv
