In [125]:
import pandas as pd
import torch

In [126]:
games = pd.read_csv("/Users/brentkong/Documents/curling/data/Games.csv")
stones = pd.read_csv("/Users/brentkong/Documents/curling/data/Stones.csv")
ends = pd.read_csv("/Users/brentkong/Documents/curling/data/Ends.csv")

In [127]:
games["GameUID"] = games.apply(lambda r: f"{int(r.CompetitionID)}_{int(r.SessionID)}_{int(r.GameID)}", axis=1)
stones["GameUID"] = stones.apply(lambda r: f"{int(r.CompetitionID)}_{int(r.SessionID)}_{int(r.GameID)}", axis=1)
stones["EndUID"] = stones.apply(lambda r: f"{int(r.CompetitionID)}_{int(r.SessionID)}_{int(r.GameID)}_{int(r.TeamID)}_{int(r.EndID)}", axis=1)
stones["ShotUID"] = stones.apply(lambda r: f"{int(r.CompetitionID)}_{int(r.SessionID)}_{int(r.GameID)}_{int(r.EndID)}_{int(r.ShotID)}", axis=1)
stones.loc[stones["ShotUID"] == "24250026_18_1_9_18", "TeamID"] = 37
ends["GameUID"] = ends.apply(lambda r: f"{int(r.CompetitionID)}_{int(r.SessionID)}_{int(r.GameID)}", axis=1)
ends["TeamUID"] = ends.apply(lambda r: f"{int(r.CompetitionID)}_{int(r.SessionID)}_{int(r.GameID)}_{int(r.TeamID)}", axis=1)
ends["EndUID"] = ends.apply(lambda r: f"{int(r.CompetitionID)}_{int(r.SessionID)}_{int(r.GameID)}_{int(r.TeamID)}_{int(r.EndID)}", axis=1)


In [128]:
ends.loc[ends["Result"] == 9, "Result"] = 0
ends["PowerPlay"] = ends["PowerPlay"].fillna(0)
ends["PowerPlayBool"] = ends["PowerPlay"].replace(2, 1)
ends = ends.sort_values(["GameUID", "TeamUID", "EndID"])
ends["CumulativeScore"] = ends.groupby("TeamUID")["Result"].cumsum()
ends = ends.merge(games[["GameUID","NOC1","NOC2","TeamID1","TeamID2","LSFE"]], on="GameUID", how="left").reset_index(drop = True)
stones = stones.merge(ends[["EndUID", "Result", "PowerPlay"]], on = "EndUID", how = "left").reset_index(drop = True)


In [129]:
def assign_hammer(row):
    if row.ShotID % 2 != 0:
        return 0
    else:
        return 1

stones["Has_Hammer"] = stones.apply(assign_hammer, axis = 1)
ends = ends.merge(stones[["EndUID", "Has_Hammer"]], on="EndUID", how="left").drop_duplicates().reset_index(drop = True)

In [130]:
def get_opp_id(row):
    if row.TeamID == row.TeamID1:
        return row.TeamID2
    if row.TeamID == row.TeamID2:
        return row.TeamID1
    return None

ends["OpponentID"] = ends.apply(get_opp_id, axis=1)

In [131]:
ends["OpponentEndUID"] = ends.apply(lambda r: f"{int(r.CompetitionID)}_{int(r.SessionID)}_{int(r.GameID)}_{int(r.OpponentID)}_{int(r.EndID)}", axis=1)

ends = ends.merge(
    ends[["EndUID", "CumulativeScore"]].rename(columns={
        "EndUID": "OpponentEndUID",
        "CumulativeScore": "OpponentCumulative"
    }),
    on="OpponentEndUID",
    how="left"
)

ends["ScoreDiff"] = ends["CumulativeScore"] - ends["OpponentCumulative"]


ends = ends.sort_values(["GameUID", "TeamID", "EndID"])

ends["PrevScoreDiff"] = (
    ends.groupby(["GameUID", "TeamID"])["ScoreDiff"]
        .shift(1)
        .fillna(0)
)


In [132]:
ends = ends.merge(
    ends[["EndUID", "Result"]].rename(columns={
        "EndUID": "OpponentEndUID",
        "Result": "OpponentResult"
    }),
    on="OpponentEndUID",
    how="left"
)
ends["EndDiff"] = ends["Result"] - ends["OpponentResult"]

ends = ends.sort_values(["GameUID", "TeamID", "EndID"])

ends["PrevEndDiff"] = (
    ends.groupby(["GameUID", "TeamID"])["EndDiff"]
        .shift(1)
        .fillna(0)
)

In [133]:
def compute_geometry(board_x, board_y):
    house = [(x,y) for x,y in zip(board_x,board_y) if 0 < x < 1500 and 0 < y < 3000] # no 0 or 4095
    if not house:
        return 0,0,0,0

    # Burial depth: how close a house stone is to center
    cx, cy = 750, 800
    burial = min(((x-cx)**2 + (y-cy)**2)**0.5 for x,y in house)

    # Guard cover angle: angle between closest stone and center
    gx, gy = house[0]
    angle = torch.atan2(torch.tensor(gy-cy), torch.tensor(gx-cx)).item()

    # Clustering: how tight stones are packed
    cluster = sum(((x-cx)**2 + (y-cy)**2)**0.5 < 300 for x,y in house) / len(house)

    # Side openness: difference between left and right free space
    left_open = sum(x < cx for x,_ in house)
    right_open = sum(x > cx for x,_ in house)
    openness = right_open - left_open

    return burial, angle, cluster, openness

stones = stones.copy()
stones["BurialDepth"], stones["GuardAngle"], stones["ClusterIndex"], stones["SideOpenness"] = zip(
    *stones.apply(lambda r: compute_geometry(
        [r[f"stone_{i}_x"] for i in range(1,13)],
        [r[f"stone_{i}_y"] for i in range(1,13)]
    ), axis=1)
)

In [134]:
tendency = (
    ends[ends["PowerPlay"] != 0]  # remove 0 rows
    .groupby("OpponentID")["PowerPlay"]
    .value_counts(normalize=True)
    .unstack()
    .fillna(0)
)

tendency.columns = ["Tendency_PP_Right", "Tendency_PP_Left"]  # only 2 columns now
ends = ends.merge(tendency, on="OpponentID", how="left")
ends[["Tendency_PP_Right", "Tendency_PP_Left"]] = ends[["Tendency_PP_Right", "Tendency_PP_Left"]].fillna(0)

In [135]:
geom = stones.groupby("EndUID")[["BurialDepth","GuardAngle","ClusterIndex","SideOpenness"]].mean()
ends = ends.merge(geom, on="EndUID", how="left").fillna(0)

In [136]:
pp_varient = {"PP": ends.loc[ends["PowerPlayBool"] == 1],  "NO_PP": ends.loc[ends["PowerPlayBool"] == 0]}
final = {}
for action, variant in pp_varient.items():
    out = {}
    for end in range(1, 9):
        value_counts = variant.loc[variant["EndID"] == end]["EndDiff"].value_counts().to_dict()
        for key in value_counts:
            value_counts[key] /= len(variant.loc[variant["EndID"] == end])
        out[end] = value_counts
    final[action] = out
print(final)

{'PP': {1: {}, 2: {-1: 1.0}, 3: {1: 0.4666666666666667, -1: 0.2, 3: 0.13333333333333333, -2: 0.13333333333333333, 2: 0.06666666666666667}, 4: {1: 0.48148148148148145, 2: 0.18518518518518517, -1: 0.1111111111111111, -2: 0.1111111111111111, -3: 0.07407407407407407, 3: 0.037037037037037035}, 5: {1: 0.3246753246753247, 3: 0.22077922077922077, 2: 0.22077922077922077, -1: 0.15584415584415584, -2: 0.03896103896103896, 4: 0.025974025974025976, -3: 0.012987012987012988}, 6: {1: 0.33170731707317075, 2: 0.25853658536585367, 3: 0.16585365853658537, -1: 0.15609756097560976, -2: 0.02926829268292683, 5: 0.024390243902439025, 4: 0.014634146341463415, -3: 0.00975609756097561, 0: 0.004878048780487805, -4: 0.004878048780487805}, 7: {2: 0.3269230769230769, 1: 0.28365384615384615, 3: 0.14423076923076922, -1: 0.10576923076923077, -2: 0.052884615384615384, 4: 0.04807692307692308, -3: 0.014423076923076924, 5: 0.009615384615384616, 6: 0.009615384615384616, -4: 0.004807692307692308}, 8: {1: 0.35384615384615387,

In [137]:
bayesian_training = ends[
    ['EndUID', "Has_Hammer",
            "PowerPlayBool",
            "EndID",
            "PrevScoreDiff"]
    ]

bayesian_training.to_csv("/Users/brentkong/Documents/curling/data_processing/processed_data/bayesian_training.csv", index=False)
ends.to_csv("/Users/brentkong/Documents/curling/data_processing/processed_data/ends_processed.csv", index=False)
games.to_csv("/Users/brentkong/Documents/curling/data_processing/processed_data/games_processed.csv", index=False)
stones.to_csv("/Users/brentkong/Documents/curling/data_processing/processed_data/stones_processed.csv", index=False)