# NFL - 4th Down EDA

In [4]:
import pandas as pd
import glob

RAW_DATA_PATH = "../data/raw/"

# set no limit on rows and columns displayed
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

### Data load

In [None]:
# Load play-by-play per season data
pbp = []

for file in glob.glob(RAW_DATA_PATH + "pbp_*.parquet"):
    df = pd.read_parquet(file)
    pbp.append(df)
    
# Concatenate all seasons data into a single DataFrame with season column
pbp = pd.concat(pbp, ignore_index=True)
pbp['season'] = pbp['game_id'].str[:4].astype(int)

print(f"Columns in the dataset: {pbp.columns.tolist()}")
print(f"Number of seasons: {pbp['season'].nunique()}")
print(f"Number of rows: {len(pbp)}")


Columns in the dataset: ['play_id', 'game_id', 'old_game_id', 'home_team', 'away_team', 'season_type', 'week', 'posteam', 'posteam_type', 'defteam', 'side_of_field', 'yardline_100', 'game_date', 'quarter_seconds_remaining', 'half_seconds_remaining', 'game_seconds_remaining', 'game_half', 'quarter_end', 'drive', 'sp', 'qtr', 'down', 'goal_to_go', 'time', 'yrdln', 'ydstogo', 'ydsnet', 'desc', 'play_type', 'yards_gained', 'shotgun', 'no_huddle', 'qb_dropback', 'qb_kneel', 'qb_spike', 'qb_scramble', 'pass_length', 'pass_location', 'air_yards', 'yards_after_catch', 'run_location', 'run_gap', 'field_goal_result', 'kick_distance', 'extra_point_result', 'two_point_conv_result', 'home_timeouts_remaining', 'away_timeouts_remaining', 'timeout', 'timeout_team', 'td_team', 'td_player_name', 'td_player_id', 'posteam_timeouts_remaining', 'defteam_timeouts_remaining', 'total_home_score', 'total_away_score', 'posteam_score', 'defteam_score', 'score_differential', 'posteam_score_post', 'defteam_score_po

### Feature classification

In [6]:
ID_COLS = [
    "play_id","game_id","old_game_id","nfl_api_id","nflverse_game_id",
    "season","season_type","week","game_date","start_time","time_of_day"
]
TEAM_COLS = [
    "home_team","away_team","posteam","defteam","possession_team",
    "posteam_type","side_of_field"
]
STATE_COLS = [
    "qtr","quarter_seconds_remaining","half_seconds_remaining","game_seconds_remaining",
    "game_half","down","ydstogo","yardline_100","goal_to_go","goaltogo",
    "score_differential",
    "home_timeouts_remaining","away_timeouts_remaining",
    "posteam_timeouts_remaining","defteam_timeouts_remaining",
    "play_clock"
]
ENV_COLS = ["roof","surface","temp","wind","weather","stadium_id","game_stadium","stadium","location"]
DECISION_COLS = [
    "play_type","play_type_nfl","special_teams_play","st_play_type",
    "field_goal_attempt","field_goal_result","kick_distance",
    "punt_attempt","punt_blocked","punt_inside_twenty","punt_in_endzone","punt_out_of_bounds",
    "punt_downed","punt_fair_catch",
    "rush_attempt","pass_attempt","sack",
    "yards_gained","first_down","first_down_rush","first_down_pass","first_down_penalty",
    "fourth_down_converted","fourth_down_failed",
    "aborted_play","penalty","play_deleted","desc","time","yrdln",
    "end_clock_time","end_yard_line"
]
OUTCOME_COLS = [
    "ep","epa","wp","wpa","home_wp","away_wp","home_wp_post","away_wp_post",
    "score_differential_post","success"
]
PROB_COLS = [
    "no_score_prob","opp_fg_prob","opp_safety_prob","opp_td_prob",
    "fg_prob","safety_prob","td_prob","extra_point_prob","two_point_conversion_prob",
    "xpass","pass_oe"
]
DRIVE_COLS = [
    "drive","fixed_drive","fixed_drive_result","drive_play_count","drive_time_of_possession",
    "drive_first_downs","drive_inside20","drive_ended_with_score","drive_quarter_start","drive_quarter_end",
    "drive_yards_penalized","drive_start_transition","drive_end_transition",
    "drive_game_clock_start","drive_game_clock_end",
    "drive_start_yard_line","drive_end_yard_line",
    "series","series_success","series_result","order_sequence"
]
SCORE_COLS = [
    "total_home_score","total_away_score",
    "posteam_score","defteam_score",
    "posteam_score_post","defteam_score_post",
    "away_score","home_score"
]
VEGAS_COLS = ["result","spread_line","total_line","total","vegas_wp","vegas_home_wp","vegas_wpa","vegas_home_wpa","div_game"]
CALL_COLS = ["shotgun","no_huddle","qb_dropback","qb_spike","qb_kneel","qb_scramble"]
