In [1]:
import graph_tool.all as gt
import polars as pl
import numpy as np
import random
from collections import defaultdict
import os.path

print("Loading tracking data...")
players = pl.read_csv("nfl-big-data-bowl-2025/players.csv")
tracking = pl.read_csv(
    "nfl-big-data-bowl-2025/tracking_week_1.csv",
    null_values=["NA","na","N/A","n/a","NULL","null","None","none"]
)

pbp_file = "pbp_2022.csv"
if not os.path.isfile(pbp_file):
    print(f"NFL play-by-play data file '{pbp_file}' not found!")
    print("Please run the R script first: Rscript get_nfl_data.R")
    exit(1)

Loading tracking data...


In [3]:
def group_rows_by_frame(data, gameId, playId):
    frames = defaultdict(list)
    for row in data.iter_rows():
        frame_id = row[4]
        if (frame_id % 24 == 0 or frame_id == 1) and row[0] == gameId and row[1] == playId:
            frames[frame_id].append(row)
    return frames

def construct_graph(data, gameId, playId):
    g = gt.Graph(directed=False)
    # vertex properties
    player_id_prop = g.new_vertex_property("string")
    x_prop         = g.new_vertex_property("float")
    y_prop         = g.new_vertex_property("float")
    g.vertex_properties["player_id"] = player_id_prop
    g.vertex_properties["x"]         = x_prop
    g.vertex_properties["y"]         = y_prop
    # edge weight
    weight_prop = g.new_edge_property("float")
    g.edge_properties["weight"] = weight_prop

    vertex_dict = {}
    # add vertices
    for row in data.iter_rows():
        if (row[0]==gameId and row[1]==playId and row[3]!="football"
            and ((int(row[4])%24==0) or (int(row[4])==1))):
            pid = row[2]
            if pid!="None" and pid not in vertex_dict:
                v = g.add_vertex()
                player_id_prop[v] = pid
                vertex_dict[pid]   = v

    frames = group_rows_by_frame(data, gameId, playId)
    graphs = []
    # build graph per frame
    for frame, rows in frames.items():
        # update coords
        for row in rows:
            pid = row[2]
            if pid in vertex_dict:
                v = vertex_dict[pid]
                x_prop[v] = float(row[10])
                y_prop[v] = float(row[11])
        # clear & recreate edges
        g.clear_edges()
        verts = list(vertex_dict.values())
        n = len(verts)
        print(f"Number of vertices: {n}")
        for i in range(n):
            for j in range(i+1, n):
                v1, v2 = verts[i], verts[j]
                dx = x_prop[v1] - x_prop[v2]
                dy = y_prop[v1] - y_prop[v2]
                dist = (dx*dx + dy*dy)**0.5
                w = 1/dist if dist!=0 else 0
                e = g.add_edge(v1, v2)
                weight_prop[e] = w
        print(f"Frame: {frame} Edges: {g.num_edges()}")
        graphs.append(g.copy())
    return graphs

In [4]:
print("Getting a random play...")
unique_plays   = tracking.select(['gameId','playId']).unique()
random_idx     = random.randint(0, len(unique_plays)-1)
random_game_id = unique_plays[random_idx, 0]
random_play_id = unique_plays[random_idx, 1]
print(f"Analyzing GameID: {random_game_id}, PlayID: {random_play_id}")

play_graphs = construct_graph(tracking, random_game_id, random_play_id)
print(f"Created {len(play_graphs)} frame graphs for this play")

Getting a random play...
Analyzing GameID: 2022091104, PlayID: 3369
Number of vertices: 22
Frame: 1 Edges: 231
Number of vertices: 22
Frame: 24 Edges: 231
Number of vertices: 22
Frame: 48 Edges: 231
Number of vertices: 22
Frame: 72 Edges: 231
Number of vertices: 22
Frame: 96 Edges: 231
Created 5 frame graphs for this play


In [5]:
print("Loading play-by-play data...")
pbp_data = pl.read_csv(
    pbp_file,
    null_values=["NA","na","N/A","n/a","NULL","null","None","none"],
    infer_schema_length=10000,
    schema_overrides={"total_line": pl.Float64}
)

play_data = pbp_data.filter(pl.col('play_id') == random_play_id)

# if multiple, match by date
if len(play_data) > 1:
    gid = str(random_game_id)
    yyyy, mm, dd = gid[:4], gid[4:6], gid[6:8]
    matches = []
    for row in play_data.iter_rows():
        date = row[play_data.columns.index('game_date')] if 'game_date' in play_data.columns else ""
        if f"{yyyy}-{mm}-{dd}" in str(date):
            matches.append(row)
    play_data = pl.DataFrame([matches[0]], schema=play_data.schema) if matches else play_data[:1]

epa     = play_data.select('epa').item()
def_epa = -epa
desc = (
    play_data.select('desc').item()
    if 'desc' in play_data.columns
    else play_data.select('play_description').item()
    if 'play_description' in play_data.columns
    else "N/A"
)
print(f"Play: {desc}\nOff EPA: {epa:.4f}\nDef EPA: {def_epa:.4f}")

Loading play-by-play data...
Play: (:21) (Shotgun) 16-J.Goff pass incomplete short left to 14-A.St. Brown.
Off EPA: -0.4837
Def EPA: 0.4837


  play_data = pl.DataFrame([matches[0]], schema=play_data.schema) if matches else play_data[:1]


In [6]:
frame_metrics = []
for i, g in enumerate(play_graphs, start=1):
    print(f"\n--- Frame {i} ---")
    w = g.edge_properties["weight"]

    # Betweenness
    vb, eb = gt.betweenness(g, weight=w)
    avg_btw, max_btw = vb.a.mean(), vb.a.max()
    print(f"Betweenness – avg: {avg_btw:.4f}, max: {max_btw:.4f}")

    # Closeness
    cl = gt.closeness(g, weight=w)
    print(f"Closeness – avg: {cl.a.mean():.4f}")

    # Eigenvector
    eig = gt.eigenvector(g, weight=w)
    eigv = eig[1] if isinstance(eig, tuple) else eig
    print(f"Eigenvector – avg: {eigv.a.mean():.4f}")

    # PageRank, Katz
    pr = gt.pagerank(g, weight=w);  print(f"PageRank – avg: {pr.a.mean():.4f}")
    kz = gt.katz(g, weight=w);      print(f"Katz – avg: {kz.a.mean():.4f}")

    # Clustering
    gc = gt.global_clustering(g, weight=w)[0]
    lc = gt.local_clustering(g, weight=w)
    print(f"Global clustering: {gc:.4f}, Local clustering – avg: {lc.a.mean():.4f}")

    # Density, edge weight & strength
    nV, nE = g.num_vertices(), g.num_edges()
    density = nE / (nV*(nV-1)/2)
    aw = sum(w[e] for e in g.edges())/nE
    print(f"Density: {density:.4f}, Avg edge weight: {aw:.4f}")

    strength = g.new_vertex_property("float")
    for v in g.vertices():
        strength[v] = sum(w[e] for e in v.out_edges())
    avs, mvs = strength.a.mean(), strength.a.max()
    print(f"Node strength – avg: {avs:.4f}, max: {mvs:.4f}")

    # Robustness (remove top 20% by betweenness)
    ranking = np.argsort(vb.a)[::-1]
    g2 = g.copy()
    for v in sorted(ranking[:int(nV*0.2)], reverse=True):
        g2.remove_vertex(v)
    comps, hist = gt.label_components(g2)
    rob = max(hist)/g2.num_vertices() if g2.num_vertices()>0 else 0
    print(f"Robustness: {rob:.4f}")

    # Track for summary
    frame_metrics.append({
        'frame': i, 'avg_betweenness': avg_btw, 'max_betweenness': max_btw,
        'avg_closeness': cl.a.mean(), 'avg_eigen': eigv.a.mean(),
        'avg_pagerank': pr.a.mean(), 'avg_katz': kz.a.mean(),
        'global_clustering': gc, 'avg_local_clustering': lc.a.mean(),
        'density': density, 'avg_weight': aw,
        'avg_strength': avs, 'max_strength': mvs, 'robustness': rob
    })

# Summary table
import pandas as pd
df = pd.DataFrame(frame_metrics)
df.mean(numeric_only=True).to_frame('overall_avg').T


--- Frame 1 ---
Betweenness – avg: 0.0223, max: 0.4095
Closeness – avg: 8.2440
Eigenvector – avg: 0.2067
PageRank – avg: 0.0455
Katz – avg: 0.2132
Global clustering: 0.2034, Local clustering – avg: 0.2006
Density: 1.0000, Avg edge weight: 0.1787
Node strength – avg: 3.7530, max: 4.8900
Robustness: 1.0000

--- Frame 2 ---
Betweenness – avg: 0.0258, max: 0.5048
Closeness – avg: 11.8158
Eigenvector – avg: 0.1992
PageRank – avg: 0.0455
Katz – avg: 0.2132
Global clustering: 0.1798, Local clustering – avg: 0.1684
Density: 1.0000, Avg edge weight: 0.1405
Node strength – avg: 2.9495, max: 4.5246
Robustness: 1.0000

--- Frame 3 ---
Betweenness – avg: 0.0251, max: 0.3000
Closeness – avg: 12.6319
Eigenvector – avg: 0.1949
PageRank – avg: 0.0455
Katz – avg: 0.2132
Global clustering: 0.1953, Local clustering – avg: 0.1773
Density: 1.0000, Avg edge weight: 0.1401
Node strength – avg: 2.9419, max: 4.6088
Robustness: 1.0000

--- Frame 4 ---
Betweenness – avg: 0.0251, max: 0.3286
Closeness – avg: 13.0

Unnamed: 0,frame,global_clustering,density,avg_weight,robustness
overall_avg,3.0,0.190712,1.0,0.147459,1.0
