In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
DIR = "/workspace/nflbigdatabowl2023"
sys.path.append(DIR)

In [3]:
import math
from ast import literal_eval

import pandas as pd
import matplotlib.pyplot as plt

from src.pipeline.flows.main import main_flow
from src.visualization.interactive_play_selector import create_interactive_play_selector

In [4]:
_ = main_flow(max_games=3, max_plays=3)

  next(self.gen)
  next(self.gen)


In [6]:
df_plays_all = pd.read_csv(f"{DIR}/data/raw/plays.csv")
df_tracking_display_all = pd.read_csv(f"{DIR}/data/outputs/tracking_display.csv")
df_areas_all = pd.read_csv(f"{DIR}/data/outputs/pocket_areas.csv")
df_areas_all["pocket"] = df_areas_all["pocket"].apply(literal_eval)

In [7]:
def average_pocket_change(df_areas, df_tracking):
    df_areasXtracking = df_areas.merge(df_tracking[['gameId', 'playId', 'frameId', 'event', 'pff_role']]\
                                       .query("pff_role == 'Pass'"), on = ['gameId', 'playId', 'frameId'], how = 'left')
    df_areasXtracking_grouped = df_areasXtracking.groupby(['gameId', 'playId']).mean().reset_index()
    for index, row in df_areasXtracking_grouped.iterrows():
        df_ball_snap = df_areasXtracking.query("gameId == " + str(row['gameId']) + \
                                      " and playId ==" + str(row['playId']) + "and event == 'ball_snap'")
        df_end_frame = df_areasXtracking.query("gameId == " + str(row['gameId']) + \
                                      " and playId ==" + str(row['playId']) + \
                                                "and frameId == {}".format(df_ball_snap['frameId'].values[0] + 20))
        
        change_in_area_pocket_radius = df_end_frame['area'].values[0] - df_ball_snap['area'].values[0]
        change_in_area_corvex_hull = df_end_frame['area'].values[1] - df_ball_snap['area'].values[1]

        change_in_frames = df_end_frame['frameId'].values[0] - df_ball_snap['frameId'].values[0]
        average_pr = change_in_area_pocket_radius / (change_in_frames/10)
        average_ch = change_in_area_corvex_hull / (change_in_frames/10)
        
        print(average_pr, average_ch)

In [8]:
"""
Pseudocode:

- 4. Calculate average pocket loss per second
- I need a dataset that gives me: gameId, playId, method, window_type, pocket_area_start, pocket_area_end, time_start, time_end
- (variation can be `after_snap` or `before_pass`)

- 3. Create a dataset that has the above columns
- I need to filter a dataset to only rows that are in the time frame

- 2. Create a dataset filtered to only rows that are in the time frame
- I need to join tracking to area to events

- 1. Create a dataset that has the events for each frame
- I need tracking data

List of flow of tasks:

A. Clean event data (autoevent and event adjustment)
B1. Filter frame data to only X seconds after snap (depends on A)
B2. Filter frame data to only X seconds before pass (depends on A)
	X will be a variable in the function
C. Union frame data with different window types (depends on B1, B2)

D1. Calculate pocket area for passer radius
D2. Calculate pocket area for blocker convex hull
D3. Calculate pocket area for rusher convex hull
E. Union pocket area data with different methods (depends D1, D2, D3)

F. Join frame data with different window types to pocket area (depends on C, E)
G. Calculate play pcoket metrics (depends on F)
H. Calculate average pocket area loss per second (depends on H)

Diagram of flow of tasks:

  A
 / \
B1 B2  D1 D2 D3
 \ /     \ | /
  C        E
   \______/
   |
   F
   |
   G
   |
   H

"""


import pandas as pd

def get_average_pocket_area_loss_per_second(area_start, area_end, time_start, time_end):
    time_delta = time_end - time_start
    if time_delta == 0:
        return 0
  
    area_delta = area_end - area_start
    return area_delta / time_delta

def calculate_average_pocket_area_loss_per_second(
  df_play_pocket_metrics: pd.DataFrame
) -> pd.DataFrame:
    """
    Parameters:
    df_play_pocket_metrics: DataFrame for every play, pocket area method,
        and window type, with metrics related to pocket area.
      Contains columns:
        - gameId (PK)
        - playId (PK)
        - method (PK)
        - window_type (PK)
        - pocket_area_start
        - pocket_area_end
        - time_start
        - time_end

    Returns:
        DataFrame with pocket are loss per second for each primary key.
    Contains columns:
        - gameId (PK)
        - playId (PK)
        - method (PK)
        - window_type (PK)
        - average_pocket_loss_per_second
    """
    df_metric = pd.DataFrame(df_play_pocket_metrics)
    df_metric["average_pocket_area_loss_per_second"] = df_metric.apply(
        lambda df: get_average_pocket_area_loss_per_second(
            area_start=df["area_start"],
            area_end=df["area_end"],
            time_start=df["time_start"],
            time_end=df["time_end"],
        ),
        axis=1
    )
    return df_metric

def get_play_pocket_metrics(df_area: pd.DataFrame) -> pd.DataFrame:
    """
    Parameters:
    df_area:
      Contains columns:
        - gameId (PK)
        - playId (PK)
        - frameId (PK)
        - method (PK)
        - window_type (PK)
        - area

    The input can contain as many types of time window as needed.
    For example:
    - window_type = `x_after_snap`:
      - Already filtered out frames before the snap.
        - Already filtered out any plays that end less than X seconds after snap.
    - window_type = `x_before_pass`:
      - Already filtered out frames earlier than X seconds before pass.
      - Already filtered out frames after pass.
        - Already filtered out any plays where the pass is less than X seconds after the snapp

    Returns:
    Contains columns:
        - gameId (PK)
        - playId (PK)
        - method (PK)
        - window_type (PK)
        - pocket_area_start
        - pocket_area_end
        - time_start
        - time_end
    """
    # Copy input and add columns to mark start and end of time window.
    df = pd.DataFrame(df_area)
    # Find the first and last frame of each play.
    play_keys = ["gameId", "playId", "method", "window_type"]
    aggregations = {
        "min": ("frameId", min),
        "max": ("frameId", max),
        }
    df_time_window = df.groupby(play_keys).agg(**aggregations).reset_index()
    """
    df_time_window =
    gameId	playId		method	window_type		min		max
    1				1					A				after_snap		5			25
    1				2					A				after_snap		2			22
    1				1					B				after_snap		5			25
    1				2					B				after_snap		2			22
    1				1					A				before_pass		...
    ...
    """
    # Join in the start pocket area for each play.
    # Note: The join should explode if there are multiple `method` rows, so that we get each of them.
    # Note: The join columns have different names on each side.
    df_with_start = df_time_window.merge(
        df,
        left_on=(play_keys + ["min"]),
        right_on=(play_keys + ["frameId"]),
        how="left"
    ).rename(columns = {"area": "area_start"})
    # Join in the end pocket area for each play.
    df_with_both = df_with_start.merge(
        df,
        left_on=(play_keys + ["max"]),
        right_on=(play_keys + ["frameId"]),
        how="left"
    ).rename(columns = {"area": "area_end"}).drop(columns = ['frameId_x', 'frameId_y'])
    """
    gameId	playId		method		window_type		min		max		area_start	area_end
    1				1					A					after_snap		5			25		100					80
    1				2					A					after_snap		2			22		120					95
    1				1					B					after_snap		5			25		20					12
    1				2					B					after_snap		2			22		23					20
    1				1					A					before_pass		...
    ...
    """
    frames_per_second = 10.0
    df_with_both["time_start"] = df_with_both["min"].astype(float) / frames_per_second
    df_with_both["time_end"] = df_with_both["max"].astype(float) / frames_per_second
    return df_with_both


df_area_with_window = pd.DataFrame([
    # Frames inside window, but not outside window.
    {"gameId": 1, "playId": 1, "frameId": 5, "method": "A", "window_type": "after_snap", "area": 100},
    {"gameId": 1, "playId": 1, "frameId": 6, "method": "A", "window_type": "after_snap", "area": 120},
    {"gameId": 1, "playId": 1, "frameId": 25, "method": "A", "window_type": "after_snap", "area": 80},

    # Multiple pocket area methods.
    {"gameId": 1, "playId": 1, "frameId": 5, "method": "B", "window_type": "after_snap", "area": 20},
    {"gameId": 1, "playId": 1, "frameId": 6, "method": "B", "window_type": "after_snap", "area": 30},
    {"gameId": 1, "playId": 1, "frameId": 25, "method": "B", "window_type": "after_snap", "area": 12},

    # Multiple window types.
    {"gameId": 1, "playId": 1, "frameId": 12, "method": "A", "window_type": "before_pass", "area": 110},
    {"gameId": 1, "playId": 1, "frameId": 32, "method": "A", "window_type": "before_pass", "area": 75},
    {"gameId": 1, "playId": 1, "frameId": 12, "method": "B", "window_type": "before_pass", "area": 15},
    {"gameId": 1, "playId": 1, "frameId": 32, "method": "B", "window_type": "before_pass", "area": 8},

    # Multiple plays.
    {"gameId": 2, "playId": 2, "frameId": 5, "method": "A", "window_type": "after_snap", "area": 110},
    {"gameId": 2, "playId": 2, "frameId": 6, "method": "A", "window_type": "after_snap", "area": 130},
    {"gameId": 2, "playId": 2, "frameId": 25, "method": "A", "window_type": "after_snap", "area": 85},
    {"gameId": 2, "playId": 2, "frameId": 5, "method": "B", "window_type": "after_snap", "area": 25},
    {"gameId": 2, "playId": 2, "frameId": 6, "method": "B", "window_type": "after_snap", "area": 35},
    {"gameId": 2, "playId": 2, "frameId": 25, "method": "B", "window_type": "after_snap", "area": 17},
    {"gameId": 2, "playId": 2, "frameId": 12, "method": "A", "window_type": "before_pass", "area": 115},
    {"gameId": 2, "playId": 2, "frameId": 32, "method": "A", "window_type": "before_pass", "area": 80},
    {"gameId": 2, "playId": 2, "frameId": 12, "method": "B", "window_type": "before_pass", "area": 18},
    {"gameId": 2, "playId": 2, "frameId": 32, "method": "B", "window_type": "before_pass", "area": 12},
])
df_play_pocket_metrics = get_play_pocket_metrics(df_area_with_window)
get_play_pocket_metrics(df_area_with_window)
df_area_with_window

Unnamed: 0,gameId,playId,frameId,method,window_type,area
0,1,1,5,A,after_snap,100
1,1,1,6,A,after_snap,120
2,1,1,25,A,after_snap,80
3,1,1,5,B,after_snap,20
4,1,1,6,B,after_snap,30
5,1,1,25,B,after_snap,12
6,1,1,12,A,before_pass,110
7,1,1,32,A,before_pass,75
8,1,1,12,B,before_pass,15
9,1,1,32,B,before_pass,8


In [12]:
DIR = "/workspace/nflbigdatabowl2023"
df_events = pd.read_csv(f"{DIR}/data/outputs/events.csv")
df_areas = pd.read_csv(f"{DIR}/data/outputs/pocket_areas.csv")
df = pd.DataFrame(df_events)
df["window_type"] = "after_pass"
df_events.query("event == 'ball_snap'")
df.merge(df_areas.drop(columns = ["pocket"]), how = "left").head(10)

Unnamed: 0,gameId,playId,frameId,event,frame_start,frame_end,passer_out_of_pocket,eligible_for_pocket,window_type,method,area
0,2021090900,97,1,,6,38,False,False,after_pass,passer_radius,104.729389
1,2021090900,97,1,,6,38,False,False,after_pass,blocker_convex_hull,12.4402
2,2021090900,97,2,,6,38,False,False,after_pass,passer_radius,104.014363
3,2021090900,97,2,,6,38,False,False,after_pass,blocker_convex_hull,12.35715
4,2021090900,97,3,,6,38,False,False,after_pass,passer_radius,103.956872
5,2021090900,97,3,,6,38,False,False,after_pass,blocker_convex_hull,12.353
6,2021090900,97,4,,6,38,False,False,after_pass,passer_radius,104.917257
7,2021090900,97,4,,6,38,False,False,after_pass,blocker_convex_hull,12.53245
8,2021090900,97,5,,6,38,False,False,after_pass,passer_radius,105.332575
9,2021090900,97,5,,6,38,False,False,after_pass,blocker_convex_hull,12.7102


In [20]:
def get_frames_with_area_and_filter(
    df_events: pd.DataFrame,
    seconds_factor: float) -> pd.DataFrame:
    #This method will apply all the area algorithms to the dataframe and filter out plays
    """
    Parameters:
    df_events:
      Contains columns:
        - gameId (PK)
        - playId (PK)
        - frameId (PK)
        - event
        - frame_start (snap of the ball)
        - frame_end (pass, sack, etc)
        - passer_out_of_pocket
        - elgible_for_pocket
    
    second_factor:
        -X seconds factor after snap/before snap

    The input can contain as many types of time window as needed.
    For example:
    - window_type = `x_after_snap`:
      - Already filtered out frames before the snap.
        - Already filtered out any plays that end less than X seconds after snap.
    - window_type = `x_before_pass`:
      - Already filtered out frames earlier than X seconds before pass.
      - Already filtered out frames after pass.
        - Already filtered out any plays where the pass is less than X seconds after the snapp

    Returns:
    Contains columns:
        - gameId (PK)
        - playId (PK)
        - frameId (PK)
        - method (PK)
        - window_type (PK)
        - area
    """
    #df_areas possibly put as a parameter for this method??
    df_areas = pd.read_csv(f"{DIR}/data/outputs/pocket_areas.csv")
    
    df = pd.DataFrame(df_events)
    
    #10 frames per second
    frames_factor = seconds_factor / 10
    
    #filter 1:
    #getting accurate after snap frames and declaring frame as "after_snap" window type
    df_after_snap = df.query("frameId >= {first_frame} and frameId <= {second_frame}".format(first_frame = df["frame_start"] + frames_factor, second_frame = df["frame_end"]))
    df_after_snap["window_type"] = "after_snap"
    
    #getting accurate before snap frames and declaring frame as "before_pass" window type    
    df_before_pass = df.query("frameId >= {first_frame} and frameId <= {second_frame}".format(first_frame = df["frame_end"] - frames_factor, second_frame = df["frame_end"]))
    df_before_pass["window_type"] = "before_pass"
    
    #merging after_snap and before_pass with area dataframe
    merged_after_snap_with_area = df_after_snap.merge(df_areas.drop(columns = ["pocket"]), how = "left")
    merged_before_pass_with_area = df_before_pass.merge(df_areas.drop(columns = ["pocket"]), how = "left")
    #Combining the two merged dataframes
    prefiltered_returned_frames = pd.concat([merged_after_snap_with_area, merged_before_pass_with_area])
    
    #filter 2: making sure all the frames are elgible to be a pocket
    returned_frames = prefiltered_returned_frames.query("eligible_for_pocket == True")
    
    return returned_frames

In [21]:
get_frames_with_area_and_filter(df_events, 0.0)

ValueError: multi-line expressions are only valid in the context of data, use DataFrame.eval