In [1]:
import sys
from pathlib import Path
import pickle
import numpy as np

ROOT = Path.cwd().parent 
sys.path.insert(0, str(ROOT))

from playstyle_utils import SplitPossessionPhases, FilterPhases, RemoveNoise, MakeMovementChains, split_sequences_on_time_gaps

In [2]:
PROJECT_ROOT = Path.cwd().resolve().parent 
DATA = PROJECT_ROOT / "data" / "derived"

with open(DATA / "match_events.pkl", "rb") as f:
    match_events = pickle.load(f)

with open(DATA / "mappings.pkl", "rb") as f:
    maps = pickle.load(f)

team_name_mapping = maps["team_name_mapping"]

In [3]:
# Loop over matches, split each team’s actions into possession phases, clean each phase
# (remove unwanted actions + drop invalid sequences)
match_phases = {}

for i, (game_id, df_nice_actions) in enumerate(match_events.items(), start=1):
    print(f"Processing game {i} / {len(match_events)}")

    complete_clubs_and_phases = SplitPossessionPhases().split_possession_phases(df_nice_actions, team_name_mapping)

    for club_index in range(0, len(complete_clubs_and_phases), 2):
        club_name = complete_clubs_and_phases[club_index]
        phase = complete_clubs_and_phases[club_index + 1]

        phase = FilterPhases(phase).filter()

        # For each game/team phase sequence, split it into smaller sub-sequences whenever there is a time gap >= 10 seconds
        phase = split_sequences_on_time_gaps(phase)

        match_phases[f"{game_id}_{club_name}"] = phase

Processing game 1 / 1826
Processing game 2 / 1826
Processing game 3 / 1826
Processing game 4 / 1826
Processing game 5 / 1826
Processing game 6 / 1826
Processing game 7 / 1826
Processing game 8 / 1826
Processing game 9 / 1826
Processing game 10 / 1826
Processing game 11 / 1826
Processing game 12 / 1826
Processing game 13 / 1826
Processing game 14 / 1826
Processing game 15 / 1826
Processing game 16 / 1826
Processing game 17 / 1826
Processing game 18 / 1826
Processing game 19 / 1826
Processing game 20 / 1826
Processing game 21 / 1826
Processing game 22 / 1826
Processing game 23 / 1826
Processing game 24 / 1826
Processing game 25 / 1826
Processing game 26 / 1826
Processing game 27 / 1826
Processing game 28 / 1826
Processing game 29 / 1826
Processing game 30 / 1826
Processing game 31 / 1826
Processing game 32 / 1826
Processing game 33 / 1826
Processing game 34 / 1826
Processing game 35 / 1826
Processing game 36 / 1826
Processing game 37 / 1826
Processing game 38 / 1826
Processing game 39 / 

In [4]:
# Drop the last event of a phase if it ends at x=0 

for game_id in match_phases:
    for n, phases in enumerate(match_phases[game_id]):
        if phases[len(phases) - 1]['end_x'] == 0:
            phases.remove(phases[len(phases) - 1])

In [5]:
# Build “movement chains” by splitting each possession sequence whenever the player changes,
# keeping only chains of 4 consecutive player-involvement segments

match_movement_chains = {}
for id in match_phases:
    MC = MakeMovementChains(match_phases[id])
    match_movement_chains[id] = MC

In [6]:
# Convert each movement chain into a trajectory of [x, y] points by collecting each event’s start and end position,

match_movement_chains_coords = {}

for id in match_movement_chains:

    movement_chain = match_movement_chains[id]
    coordinates = []
    for chain in movement_chain:
        chain_coordinates = []
        i = 0
        for event in chain:
            i += 1
            if i == len(chain):
                chain_coordinates.append([event['start_x'], event["start_y"]])
                chain_coordinates.append([event['end_x'], event["end_y"]])
            else:
                chain_coordinates.append([event['start_x'], event["start_y"]])
        coordinates.append(chain_coordinates)

    match_movement_chains_coords[id] = coordinates

In [None]:
# Remove noisy/invalid trajectories from the coordinate list using rule-based filters

for game_id in match_movement_chains_coords:
    print(game_id)
    match_movement_chains_coords[game_id] = RemoveNoise(match_movement_chains_coords[game_id]).remove_noise()
    chains = []
    for chain in match_movement_chains_coords[game_id]:
        chain = [[x/105, y/68] for x, y in chain]
        chains.append(chain)
    match_movement_chains_coords[game_id] = chains


In [8]:
# Flatten all trajectories across game/teams into one list

movement_chain_coords = []
for id in match_movement_chains_coords:
    chain_coordinates = match_movement_chains_coords[id]
    for chain in chain_coordinates:
        #chain = [[x/105, y/68] for x, y in chain]
        movement_chain_coords.append(np.array(chain, dtype=np.double))

In [9]:
PROJECT_ROOT = Path.cwd().resolve().parent   
OUT = PROJECT_ROOT / "data" / "derived"
OUT.mkdir(parents=True, exist_ok=True)

with open(OUT / "movement_chain_coords.pkl", "wb") as f:
    pickle.dump(movement_chain_coords, f)

with open(OUT / "match_movement_chains.pkl", "wb") as f:
    pickle.dump(match_movement_chains, f)

with open(OUT / "match_movement_chains_coords.pkl", "wb") as f:
    pickle.dump(match_movement_chains_coords, f)