In [5]:
pip install spacy; torch

Collecting spacy
  Downloading spacy-3.8.11-cp313-cp313-macosx_11_0_arm64.whl.metadata (27 kB)
Collecting spacy-legacy<3.1.0,>=3.0.11 (from spacy)
  Using cached spacy_legacy-3.0.12-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting spacy-loggers<2.0.0,>=1.0.0 (from spacy)
  Using cached spacy_loggers-1.0.5-py3-none-any.whl.metadata (23 kB)
Collecting murmurhash<1.1.0,>=0.28.0 (from spacy)
  Downloading murmurhash-1.0.15-cp313-cp313-macosx_11_0_arm64.whl.metadata (2.3 kB)
Collecting cymem<2.1.0,>=2.0.2 (from spacy)
  Downloading cymem-2.0.13-cp313-cp313-macosx_11_0_arm64.whl.metadata (9.7 kB)
Collecting preshed<3.1.0,>=3.0.2 (from spacy)
  Downloading preshed-3.0.12-cp313-cp313-macosx_11_0_arm64.whl.metadata (2.5 kB)
Collecting thinc<8.4.0,>=8.3.4 (from spacy)
  Downloading thinc-8.3.10-cp313-cp313-macosx_11_0_arm64.whl.metadata (15 kB)
Collecting wasabi<1.2.0,>=0.9.1 (from spacy)
  Using cached wasabi-1.1.3-py3-none-any.whl.metadata (28 kB)
Collecting srsly<3.0.0,>=2.4.3 (from spacy)
 

In [None]:
import os
import pandas as pd
import spacy
import torch
from torch.utils.data import Dataset

nlp = spacy.load("en_core_web_sm")

# Load data
ends = pd.read_csv(os.path.join("/Users/brentkong/Documents/curling/data/Ends.csv"))
games = pd.read_csv(os.path.join("/Users/brentkong/Documents/curling/data/Games.csv"))
stones = pd.read_csv(os.path.join("/Users/brentkong/Documents/curling/data/Stones.csv"))

# Create unique match ID
games["GameUID"] = games.apply(lambda r: f"{r.CompetitionID}_{r.SessionID}_{r.GameID}", axis=1)
ends["EndUID"] = ends.apply(lambda r: f"{r.CompetitionID}_{r.SessionID}_{r.GameID}_{r.TeamID}_{r.EndID}", axis=1)
stones["ThrowUID"] = stones.apply(lambda r: f"{r.CompetitionID}_{r.SessionID}_{r.GameID}_{r.TeamID}_{r.EndID}_{r.ShotID}", axis=1)

# Attach opponent info into ends table
ends = ends.merge(games[["GameUID","NOC1","NOC2","TeamID1","TeamID2","LSFE"]], on="GameUID", how="left")

# Determine opponent NOC
def get_opp_noc(row):
    if row.TeamID == row.TeamID1:
        return row.NOC2
    if row.TeamID == row.TeamID2:
        return row.NOC1
    return None

ends["OpponentNOC"] = ends.apply(get_opp_noc, axis=1)

# Compute hammer possession per end (mixed doubles rule)
def assign_hammer(df):
    df = df.sort_values(["GameUID","EndID"])
    hammer = {}
    for game, sub in df.groupby("GameUID"):
        first_hammer = 2 if sub.iloc[0].LSFE == 0 else 1
        prev = first_hammer
        for _, r in sub.iterrows():
            hammer[r.EndID] = prev
            if r.Result > 0:
                prev = 1 if prev == 2 else 2
            else:
                prev = 1 if prev == 2 else 2
    df["HammerTeam"] = df["EndID"].map(hammer)
    df["HasHammer"] = (df.TeamID == df.HammerTeam).astype(int)
    return df

ends = assign_hammer(ends)

# Geometry feature extraction
def compute_geometry(board_x, board_y):
    house = [(x,y) for x,y in zip(board_x,board_y) if 0 < x < 1500 and 0 < y < 3000]
    if not house:
        return 0,0,0,0

    # Burial depth: how close a house stone is to center
    cx, cy = 750, 1500
    burial = min(((x-cx)**2 + (y-cy)**2)**0.5 for x,y in house)

    # Guard cover angle: angle between closest stone and center
    gx, gy = house[0]
    angle = torch.atan2(torch.tensor(gy-cy), torch.tensor(gx-cx)).item()

    # Clustering: how tight stones are packed
    cluster = sum(((x-cx)**2 + (y-cy)**2)**0.5 < 300 for x,y in house) / len(house)

    # Side openness: difference between left and right free space
    left_open = sum(x < cx for x,_ in house)
    right_open = sum(x > cx for x,_ in house)
    openness = right_open - left_open

    return burial, angle, cluster, openness

stones = stones.copy()
stones["BurialDepth"], stones["GuardAngle"], stones["ClusterIndex"], stones["SideOpenness"] = zip(
    *stones.apply(lambda r: compute_geometry(
        [r[f"stone_{i}_x"] for i in range(1,13)],
        [r[f"stone_{i}_y"] for i in range(1,13)]
    ), axis=1)
)

# Aggregate opponent tendencies
tendency = ends.groupby("OpponentNOC")["PowerPlay"].value_counts(normalize=True).unstack().fillna(0)
tendency.columns = ["PP_None","PP_Right","PP_Left"]
ends = ends.merge(tendency, on="OpponentNOC", how="left")

# Merge geometry features into ends
geom = stones.groupby("EndID")[["BurialDepth","GuardAngle","ClusterIndex","SideOpenness"]].mean()
ends = ends.merge(geom, on="EndID", how="left").fillna(0)

# Final modeling table
model_df = ends[[
    "GameUID","TeamID","OpponentNOC","EndID","Result","PowerPlay","HasHammer",
    "PP_Right","PP_Left","BurialDepth","GuardAngle","ClusterIndex","SideOpenness"
]]

model_df.to_csv("/Users/brentkong/Documents/curling/model_ready_powerplay_ev.csv", index=False)
print("Model table saved as model_ready_powerplay_ev.csv")
