Updated in v4:

1. I also added animations with impact events markers.

Updated in v3:

1. I just found this [message from the host](https://www.kaggle.com/c/nfl-impact-detection/discussion/198728#1087657) and to meet this, I need to shift 0.1 from the 'ball_snap'.

> The videos begin 10 frames before the snap. The tracking data contains an "event" column in which the "ball_snap" is recorded. The Sideline and Endzone views have been time-synced such that the snap occurs 10 frames into the video. This time alignment should be considered to be accurate to within +/- 3 frames or 0.05 seconds (video data is recorded at approximately 59.94 frames per second). If you do some math with frame rate etc., you can align the tracking data with the video data. It will be close, but not exact because the video data is recorded at 60 HZ and the NGS data at 10 HZ.

2. `make_alignment` function had a bug. Fixed.


Updated in v2:

1. Make the animation larger so that you can see the player better.
2. Annotate `train_player_tracking.csv` with impact event.

## About 

In this notebook, I create animated datasets of `train_player_tracking.csv` and `test_player_tracking.csv` using `matplotlib`.
I try to make it aligned with video files, so it could be used to help find the impact frame or maybe you can further use that to find where the impact occured.

In [None]:
import datetime as dt
import warnings

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from IPython.display import Video
from pathlib import Path

from matplotlib import animation
from matplotlib import patches
from tqdm.notebook import tqdm

warnings.simplefilter("ignore")

## Utilities

Copied from https://www.kaggle.com/samhuddleston/nfl-1st-and-future-getting-started , but originally used in https://www.kaggle.com/robikscube/nfl-big-data-bowl-plotting-player-position/notebook.

In [None]:
def create_football_field(linenumbers=True,
                          endzones=True,
                          highlight_line=False,
                          highlight_line_number=50,
                          highlighted_name='Line of Scrimmage',
                          fifty_is_los=False,
                          figsize=(12, 6.33)):
    """
    Function that plots the football field for viewing plays.
    Allows for showing or hiding endzones.
    """
    rect = patches.Rectangle((0, 0), 120, 53.3, linewidth=0.1,
                             edgecolor='r', facecolor='forestgreen', zorder=0)  # changed the field color to forestgreen

    fig, ax = plt.subplots(1, figsize=figsize)
    ax.add_patch(rect)

    plt.plot([10, 10, 10, 20, 20, 30, 30, 40, 40, 50, 50, 60, 60, 70, 70, 80,
              80, 90, 90, 100, 100, 110, 110, 120, 0, 0, 120, 120],
             [0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3,
              53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 53.3, 0, 0, 53.3],
             color='white')
    if fifty_is_los:
        plt.plot([60, 60], [0, 53.3], color='gold')
        plt.text(62, 50, '<- Player Yardline at Snap', color='gold')
    # Endzones
    if endzones:
        ez1 = patches.Rectangle((0, 0), 10, 53.3,
                                linewidth=0.1,
                                edgecolor='r',
                                facecolor='blue',
                                alpha=0.2,
                                zorder=0)
        ez2 = patches.Rectangle((110, 0), 120, 53.3,
                                linewidth=0.1,
                                edgecolor='r',
                                facecolor='blue',
                                alpha=0.2,
                                zorder=0)
        ax.add_patch(ez1)
        ax.add_patch(ez2)
    plt.xlim(0, 120)
    plt.ylim(-5, 58.3)
    plt.axis('off')
    if linenumbers:
        for x in range(20, 110, 10):
            numb = x
            if x > 50:
                numb = 120 - x
            plt.text(x, 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='white')
            plt.text(x - 0.95, 53.3 - 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='white', rotation=180)
    if endzones:
        hash_range = range(11, 110)
    else:
        hash_range = range(1, 120)

    for x in hash_range:
        ax.plot([x, x], [0.4, 0.7], color='white')
        ax.plot([x, x], [53.0, 52.5], color='white')
        ax.plot([x, x], [22.91, 23.57], color='white')
        ax.plot([x, x], [29.73, 30.39], color='white')

    if highlight_line:
        hl = highlight_line_number + 10
        plt.plot([hl, hl], [0, 53.3], color='yellow')
        plt.text(hl + 2, 50, '<- {}'.format(highlighted_name),
                 color='yellow')
    return fig, ax

## Load Data

In [None]:
track_data = pd.read_csv("../input/nfl-impact-detection/train_player_tracking.csv")
track_data["time"] = pd.to_datetime(track_data["time"])
track_data["color"] = track_data["player"].map(lambda x: "black" if "H" in x else "white")
track_data.head()

In [None]:
train_labels = pd.read_csv("../input/nfl-impact-detection/train_labels.csv")
train_labels.head()

## Create train_player_tracking.csv with impact annotation

I modify `train_player_tracking.csv` to only include tracking data that is in the video and annotate the tracking data point where impacts occured.
To do this I first make alignment between tracking data and frames, and determine the time where impact occured by picking the closest time to the imact frame.

In [None]:
def make_alignment(train_track: pd.DataFrame, train_label: pd.DataFrame, video_dir: Path, game_key: int, play_id: int):
    play_track = train_track.query(f"gameKey == {game_key} & playID == {play_id}")
    play_label = train_label.query(f"gameKey == {game_key} & playID == {play_id}")
    
    play_track["impact"] = 0
    
    snap_frame = play_track.query("event == 'ball_snap'")
    snap_time = snap_frame["time"].iloc[0]
    snap_time -= dt.timedelta(seconds=0.1)
    
    video_name = f"{game_key}_{str(play_id).rjust(6, '0')}_Endzone.mp4"
    video = cv2.VideoCapture(str(video_dir / video_name))
    
    fps = video.get(cv2.CAP_PROP_FPS)
    nframes = play_label.frame.nunique()
    
    duration = nframes / fps
    end_time = snap_time + dt.timedelta(seconds=duration)
    
    play = play_track.loc[(play_track["time"] >= snap_time) & (play_track["time"] < end_time)].copy()
    
    impact_frames = play_label.query("impact == 1 & view == 'Endzone'")
    for _, row in impact_frames.iterrows():
        frame = row.frame
        label = row.label
        time_from_start = frame / fps
        time = snap_time + dt.timedelta(seconds=time_from_start)
        
        abs_timedelta = abs(play["time"] - time).dt.total_seconds()
        min_abs_timedelta = abs_timedelta.min()
        impact_point_index = play[abs_timedelta == min_abs_timedelta].query(
            f"player == '{label}'").index[0]
        play.loc[impact_point_index, "impact"] = 1
    play = play.reset_index(drop=False)
    return play

In [None]:
pairs = track_data.groupby(["gameKey", "playID"]).count().index.tolist()
video_dir = Path("../input/nfl-impact-detection/train/")

play_trackings = []
for game_key, play_id in pairs:
    play_trackings.append(make_alignment(track_data, train_labels, video_dir, game_key, play_id))
    
annotated_trackings = pd.concat(play_trackings, axis=0).reset_index(drop=True)
annotated_trackings.head(10)

In [None]:
annotated_trackings.query("impact == 1 & gameKey == 57583 & playID == 82")

In [None]:
train_labels.query("impact == 1 & gameKey == 57583 & playID == 82 & view == 'Endzone'")

In [None]:
len(annotated_trackings.query(
    "impact == 1 & gameKey == 57583 & playID == 82")), len(train_labels.query(
    "impact == 1 & gameKey == 57583 & playID == 82 & view == 'Endzone'"))

In [None]:
annotated_trackings.to_csv("train_player_tracking_annotated.csv", index=False)

## Animation

In [None]:
def create_animation(play_track: pd.DataFrame, video_dir: Path, save_dir: Path):
    fig, ax = create_football_field(figsize=(24, 12.66))

    snap_frame = play_track.query("event == 'ball_snap'")
    snap_time = snap_frame["time"].iloc[0]
    snap_time -= dt.timedelta(seconds=0.1)
    
    game_key = play_track["gameKey"].iloc[0]
    play_id = play_track["playID"].iloc[0]
    
    video_name = f"{game_key}_{str(play_id).rjust(6, '0')}_Endzone.mp4"
    video = cv2.VideoCapture(str(video_dir / video_name))
    
    fps = video.get(cv2.CAP_PROP_FPS)
    nframes = 0
    while True:
        worked, _ = video.read()
        if not worked:
            break
        nframes += 1
    
    duration = nframes / fps
    end_time = snap_time + dt.timedelta(seconds=duration)
    
    play = play_track.loc[(play_track["time"] >= snap_time) & (play_track["time"] < end_time)]

    unique_times = play.time.unique()
    
    show_impact_marker = "impact" in play_track.columns
    
    # initialize the plot
    points = {}
    annotations = {}
    obj_list = []
    start_time = unique_times[0]
    tracking_at_that_moment = play[play["time"] == start_time]
    for _, row in tracking_at_that_moment.iterrows():
        player_id = row.player
        x, y = row.x, row.y
        if show_impact_marker:
            impact = row.impact
            color = row.color if impact == 0 else "red"
        else:
            color = row.color
        plot_obj = ax.scatter(x, y, color=color, s=70)
        anno_obj = ax.annotate(player_id,
                               (x, y),
                               verticalalignment="center",
                               horizontalalignment="center",
                               color="white" if color == "black" else "black",
                               fontsize=10)
        points[player_id] = plot_obj
        annotations[player_id] = anno_obj
        obj_list.append(plot_obj)
        obj_list.append(anno_obj)
        
    def init():
        return obj_list
        
    def update(step: int):
        time = unique_times[step]
        tracking_at_that_moment = play[play["time"] == time]
        for _, row in tracking_at_that_moment.iterrows():
            player_id = row.player
            x, y = row.x, row.y
            points[player_id].set_offsets(np.array([x, y]))
            if show_impact_marker:
                impact = row.impact
                color = row.color if impact == 0 else "red"
                points[player_id].set_color(color)
            annotations[player_id].set_x(x)
            annotations[player_id].set_y(y)
        return obj_list
    
    ani = animation.FuncAnimation(
        fig, update, frames=len(unique_times), interval=100, init_func=init)
    ani.save(save_dir / f"{game_key}_{str(play_id).rjust(6, '0')}_Tracking.mp4")
    plt.close()

In [None]:
game_key = track_data.loc[0, "gameKey"]
play_id = track_data.loc[0, "playID"]
play_track = track_data.query(f"gameKey == {game_key} & playID == {play_id}")

video_dir = Path("../input/nfl-impact-detection/train/")
save_dir = Path("./")

In [None]:
create_animation(play_track, video_dir, save_dir)
!ls

In [None]:
Video(data="./57583_000082_Tracking.mp4", embed=True)

In version 4, I also created anomations with impact marker.

In [None]:
play_track = annotated_trackings.query(f"gameKey == {game_key} & playID == {play_id}")
create_animation(play_track, video_dir, save_dir)
Video(data="./57583_000082_Tracking.mp4", embed=True)

Red markers correspond to impact events.

This animation corresponds to the play of `57583_000082_Endzone.mp4` and `57583_000082_Sideline.mp4`. Let's check it out.

In [None]:
Video(data="../input/nfl-impact-detection/train/57583_000082_Endzone.mp4", embed=True)

In [None]:
Video(data="../input/nfl-impact-detection/train/57583_000082_Sideline.mp4", embed=True)

## Create this animation for all the train set

In [None]:
save_dir = Path("./train_tracking")
save_dir.mkdir(exist_ok=True, parents=True)

In [None]:
pairs = track_data.groupby(["gameKey", "playID"]).count().index.tolist()
pairs[:5]

In [None]:
for game_key, play_id in tqdm(pairs):
    play_track = track_data.query(f"gameKey == {game_key} & playID == {play_id}")
    create_animation(play_track, video_dir, save_dir)

In [None]:
save_dir = Path("./train_tracking_with_impact_marker")
save_dir.mkdir(exist_ok=True, parents=True)
for game_key, play_id in tqdm(pairs):
    play_track = annotated_trackings.query(f"gameKey == {game_key} & playID == {play_id}")
    create_animation(play_track, video_dir, save_dir)

## Create this animation for all the test set

In [None]:
test_track_data = pd.read_csv("../input/nfl-impact-detection/test_player_tracking.csv")
test_track_data["time"] = pd.to_datetime(test_track_data["time"])
test_track_data["color"] = test_track_data["player"].map(lambda x: "black" if "H" in x else "white")
save_dir = Path("./test_tracking")
save_dir.mkdir(exist_ok=True, parents=True)

pairs = test_track_data.groupby(["gameKey", "playID"]).count().index.tolist()

In [None]:
video_dir = Path("../input/nfl-impact-detection/test")
for game_key, play_id in tqdm(pairs):
    play_track = test_track_data.query(f"gameKey == {game_key} & playID == {play_id}")
    create_animation(play_track, video_dir, save_dir)

## EOF