In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from statsbombpy import sb
from mplsoccer import Pitch
import numpy as np
from matplotlib.ticker import FuncFormatter

# -----------------------------
# SETTINGS
# -----------------------------
team = "Leicester City"
player_name = "N''Golo Kanté"   # StatsBomb spelling
competition_id = 2              # EPL
season_id = 27                  # 2015/16
out_dir = "kante_maps"
os.makedirs(out_dir, exist_ok=True)

# Event groups (edit if you want)
DEFENSIVE_TYPES = ["Pressure", "Ball Recovery", "Interception", "Block", "Clearance"]
ONBALL_TYPES = ["Pass", "Carry", "Ball Receipt*", "Dribble", "Shot"]

# Heatmap bins (keep constant across comparable figures)
BINS = (25, 18)

# -----------------------------
# HELPERS
# -----------------------------
def save_fig(fig, filename):
    path = os.path.join(out_dir, filename)
    fig.savefig(path, dpi=300, bbox_inches="tight")
    print("Saved:", path)

def filter_valid_locations(df_in):
    """Keep only rows where location is a list/tuple with at least 2 values, then make numeric x,y."""
    df = df_in.copy()
    mask_loc = df["location"].apply(lambda v: isinstance(v, (list, tuple)) and len(v) >= 2)
    df = df.loc[mask_loc].copy()
    df["x"] = df["location"].str[0]
    df["y"] = df["location"].str[1]
    df["x"] = pd.to_numeric(df["x"], errors="coerce")
    df["y"] = pd.to_numeric(df["y"], errors="coerce")
    df = df.dropna(subset=["x", "y"])
    return df

def base_pitch(figsize=(11, 7)):
    pitch = Pitch(
        pitch_type="statsbomb",
        pitch_color="white",
        line_color="grey",
        line_zorder=2
    )
    fig, ax = pitch.draw(figsize=figsize)
    return pitch, fig, ax

def add_header_footer(fig, ax, title, subtitle, caption):
    ax.set_title(title, fontsize=16, fontweight="bold", pad=25)
    fig.text(0.125, 0.91, subtitle, fontsize=10, wrap=True)
    fig.text(0.125, 0.02, caption, fontsize=9, wrap=True)

def plot_binned_count_heatmap(pitch, fig, ax, df_xy, bins=BINS, cmap="Blues",
                              cbar_label="Count per zone"):
    bin_stat = pitch.bin_statistic(df_xy["x"], df_xy["y"], statistic="count", bins=bins)
    hm = pitch.heatmap(bin_stat, ax=ax, cmap=cmap)
    cbar = fig.colorbar(hm, ax=ax, fraction=0.03, pad=0.04)
    cbar.set_label(cbar_label, fontsize=11, fontweight="bold")
    return hm

def plot_kde_density(pitch, fig, ax, df_xy, levels=60, thresh=0, cmap="Blues",
                     cbar_label="Action density (KDE)"):
    # kdeplot returns an Axes, so we grab the last created collection as the mappable
    pitch.kdeplot(
        df_xy["x"], df_xy["y"],
        fill=True, levels=levels, thresh=thresh, cmap=cmap, ax=ax
    )
    if len(ax.collections) == 0:
        raise RuntimeError("No KDE collections created. Check that df_xy has rows.")
    mappable = ax.collections[-1]
    cbar = fig.colorbar(mappable, ax=ax, fraction=0.03, pad=0.04)
    cbar.set_label(cbar_label, fontsize=11, fontweight="bold")
    
    # Format colorbar ticks to show relative intensity (0-100%)
    cbar_min = mappable.norm.vmin
    cbar_max = mappable.norm.vmax
    
    def intensity_formatter(x, pos):
        if cbar_max > cbar_min:
            # Normalize to 0-100%
            intensity = ((x - cbar_min) / (cbar_max - cbar_min)) * 100
            return f"{intensity:.0f}%"
        return f"{x:.1e}"
    
    cbar.ax.yaxis.set_major_formatter(FuncFormatter(intensity_formatter))
    return mappable

def plot_points(pitch, fig, ax, df_xy, title_suffix="(points)"):
    pitch.scatter(df_xy["x"], df_xy["y"], s=10, alpha=0.25, ax=ax)

# -----------------------------
# 1) PULL MATCHES + EVENTS
# -----------------------------
matches = sb.matches(competition_id=competition_id, season_id=season_id)
matches_df = matches[(matches["home_team"] == team) | (matches["away_team"] == team)].copy()

combined = []
for match_id in matches_df["match_id"]:
    combined.append(sb.events(match_id=match_id))

df = pd.concat(combined, ignore_index=True)

# Filter to player
df_player = df[df["player"] == player_name].copy()
print("Rows for player:", len(df_player))

# Clean x,y
df_player = filter_valid_locations(df_player)
print("Rows with valid x,y:", len(df_player))

# -----------------------------
# COMMON STRINGS FOR PAPER-READY LABELS
# -----------------------------
subtitle_base = f"{team} | Premier League 2015–16 | Data: StatsBomb events (statsbombpy)"
caption_base = ("Note: A heatmap shows WHERE actions are concentrated. "
                "Counts are not distance covered, and KDE density is smoothed (not exact counts).")

# -----------------------------
# MAP A: Defensive actions (COUNT per zone)
# -----------------------------
df_def = df_player[df_player["type"].isin(DEFENSIVE_TYPES)].copy()
pitch, fig, ax = base_pitch()
plot_binned_count_heatmap(pitch, fig, ax, df_def, bins=BINS, cbar_label="Defensive actions per zone (count)")

add_header_footer(
    fig, ax,
    title="Figure A — N'Golo Kanté: Defensive actions (count heatmap)",
    subtitle=subtitle_base + f" | Events: {', '.join(DEFENSIVE_TYPES)}",
    caption=caption_base + " Metric here is COUNT per zone."
)
save_fig(fig, "A_defensive_count_heatmap.png")
plt.close(fig)

# -----------------------------
# MAP B: On-ball involvement (COUNT per zone)
# -----------------------------
df_on = df_player[df_player["type"].isin(ONBALL_TYPES)].copy()
pitch, fig, ax = base_pitch()
plot_binned_count_heatmap(pitch, fig, ax, df_on, bins=BINS, cbar_label="On-ball actions per zone (count)")

add_header_footer(
    fig, ax,
    title="Figure B — N'Golo Kanté: On-ball involvement (count heatmap)",
    subtitle=subtitle_base + f" | Events: {', '.join(ONBALL_TYPES)}",
    caption=caption_base + " Metric here is COUNT per zone."
)
save_fig(fig, "B_onball_count_heatmap.png")
plt.close(fig)

# -----------------------------
# MAP C: Defensive KDE density (SMOOTHED) - IMPROVED WITH BETTER SPACING & READABILITY
# -----------------------------
pitch, fig, ax = base_pitch(figsize=(12, 8))
plot_kde_density(pitch, fig, ax, df_def, cbar_label="Relative Intensity\n(0% = Low, 100% = High)")

# Add axis labels for clarity on field measurements
ax.set_xlabel("Field Width (0-120 yards)", fontsize=12, fontweight="bold")
ax.set_ylabel("Field Length (0-80 yards)", fontsize=12, fontweight="bold")

# Improve label visibility
ax.tick_params(axis='both', which='major', labelsize=10, colors='black')
ax.xaxis.label.set_color('black')
ax.yaxis.label.set_color('black')

add_header_footer(
    fig, ax,
    title="Figure C — N'Golo Kanté: Defensive Action Density Heatmap",
    subtitle=subtitle_base + f" | Events: {', '.join(DEFENSIVE_TYPES)}",
    caption=caption_base + " Metric: Kernel Density Estimation (KDE) smoothing reveals concentration zones. Darker/warmer regions = higher defensive activity. Field: 120 yards wide × 80 yards long."
)

# Adjust layout for better spacing - key fix for title/subtitle overlap
fig.subplots_adjust(bottom=0.14, top=0.87, left=0.12, right=0.95)

save_fig(fig, "C_defensive_kde_density.png")
plt.close(fig)

# -----------------------------
# MAP D: Defensive actions split by half (COUNT per zone)
# (Requires 'period' column; StatsBomb events usually include it)
# -----------------------------
if "period" in df_def.columns:
    df_def_h1 = df_def[df_def["period"] == 1].copy()
    df_def_h2 = df_def[df_def["period"] == 2].copy()

    # Half 1
    pitch, fig, ax = base_pitch()
    plot_binned_count_heatmap(pitch, fig, ax, df_def_h1, bins=BINS, cbar_label="Defensive actions per zone (count)")
    add_header_footer(
        fig, ax,
        title="Figure D1 — Defensive actions (1st half)",
        subtitle=subtitle_base + f" | Events: {', '.join(DEFENSIVE_TYPES)}",
        caption=caption_base + " This figure shows only FIRST HALF events."
    )
    save_fig(fig, "D1_defensive_half1_count.png")
    plt.close(fig)

    # Half 2
    pitch, fig, ax = base_pitch()
    plot_binned_count_heatmap(pitch, fig, ax, df_def_h2, bins=BINS, cbar_label="Defensive actions per zone (count)")
    add_header_footer(
        fig, ax,
        title="Figure D2 — Defensive actions (2nd half)",
        subtitle=subtitle_base + f" | Events: {', '.join(DEFENSIVE_TYPES)}",
        caption=caption_base + " This figure shows only SECOND HALF events."
    )
    save_fig(fig, "D2_defensive_half2_count.png")
    plt.close(fig)
else:
    print("Skipping Map D: no 'period' column found in events.")

# -----------------------------
# MAP E: Defensive actions as RAW POINTS (no smoothing)
# -----------------------------
pitch, fig, ax = base_pitch()
plot_points(pitch, fig, ax, df_def)

add_header_footer(
    fig, ax,
    title="Figure E — N'Golo Kanté: Defensive actions (raw points)",
    subtitle=subtitle_base + f" | Events: {', '.join(DEFENSIVE_TYPES)}",
    caption=caption_base + " Each dot is a single event; no smoothing."
)
save_fig(fig, "E_defensive_raw_points.png")
plt.close(fig)

print("\nDone. Your images are in:", out_dir)

Rows for player: 5771
Rows with valid x,y: 5763
Saved: kante_maps/A_defensive_count_heatmap.png
Saved: kante_maps/B_onball_count_heatmap.png
Saved: kante_maps/C_defensive_kde_density.png
Saved: kante_maps/D1_defensive_half1_count.png
Saved: kante_maps/D2_defensive_half2_count.png
Saved: kante_maps/E_defensive_raw_points.png

Done. Your images are in: kante_maps
