In [172]:
import numpy as np
import pandas as pd
import nfl_data_py as nfl
from datetime import datetime

In [173]:
years_to_analyze = range(2013, datetime.now().year)

In [178]:
weekly_data = nfl.import_weekly_data(years=years_to_analyze)
roster_data = nfl.import_rosters(years=years_to_analyze)
snap = nfl.import_snap_counts(years=years_to_analyze)
team_info = nfl.import_team_desc()

Downcasting floats.


In [179]:
def convert_height_to_inches(height_string: str) -> int:
    """
    Converts the height string (Feet - Inches) into inches.
    
    Arguments:
        height_string (str): Player height as a string in the format of height-inches.
        
    Returns:
        int: Player height in inches.
    
    """
        
    height_string_split = height_string.split('-')
    
    feet, inches = height_string_split[0], height_string_split[1]
    feet, inches = int(feet), int(inches)
    height = feet * 12 + inches
    return height

In [182]:
summed = weekly_data.groupby(['player_id', 'season', 'week']).sum().reset_index()

data = pd.merge(
    left=summed, 
    right=roster_data, 
    left_on='player_id', 
    right_on='player_id', 
    how='inner', 
    suffixes=('', '_DROP')
).filter(regex='^(?!.*_DROP)')

columns_to_drop = [
    'sacks',
    'sack_yards',
    'sack_fumbles',
    'espn_id',
    'sportradar_id',
    'yahoo_id',
    'rotowire_id',
    'pff_id',
    'fantasy_data_id',
    'sleeper_id',
    'esb_id',
    'gsis_it_id',
    'smart_id',
    'ngs_position',
    'entry_year',
    'player_name',
    'birth_date',
    'jersey_number',
    'special_teams_tds',
    'college',
    'depth_chart_position',
]                  
data.drop(columns_to_drop, axis=1, inplace=True)
data = data[
    data['position'].isin(['WR', 'RB', 'TE', 'QB'])
]
data = data[~data['rookie_year'].isna()]
data['rookie_year'] = data['rookie_year'].astype(int)
data['years_exp'] = (data['season'].astype(int) - data['rookie_year'].astype(int))
data['height'] = data['height'].apply(lambda height: convert_height_to_inches(height))
data.drop('weight', axis=1, inplace=True)
data['first_name'] = data['first_name'].str.lower()
data['last_name'] = data['last_name'].str.lower()

data.loc[data['fantasy_points'] > 0, 'status'] = 'Active'
data.loc[data['fantasy_points_ppr'] > 0, 'status'] = 'Active'

rows_to_drop = data.loc[
    (data['fantasy_points'] <= 0) & (data['status'] != 'Active'),
    'status'
].index
data.drop(rows_to_drop, axis=0, inplace=True)
data.drop('status', axis=1, inplace=True)

snap = snap[snap['position'].isin(data['position'].unique())]
data = pd.merge(
    left=data,
    right=snap,
    left_on=['pfr_id', 'season', 'week'],
    right_on=['pfr_player_id', 'season', 'week'],
    how='inner', 
    suffixes=('', '_DROP')
).filter(regex='^(?!.*_DROP)')
data = data[data['game_type'] == 'REG']

columns_to_drop = [
    'pfr_id',
    'game_id',
    'pfr_game_id',
    'game_type',
    'player',
    'pfr_player_id',
    'defense_snaps',
    'defense_pct',
    'st_snaps',
    'st_pct',
    
]
data.drop(
    columns_to_drop,
    axis=1,
    inplace=True
)

data = pd.merge(
    left=data,
    right=team_info[['team_abbr', 'team_conf', 'team_division']],
    left_on='team',
    right_on='team_abbr',
    how='inner',
)
data = pd.merge(
    left=data,
    right=team_info[['team_abbr', 'team_conf', 'team_division']],
    left_on='opponent',
    right_on='team_abbr',
    how='inner',
).rename(
    columns={
        'team_abbr_y': 'opponent_abbr', 
        'team_conf_y': 'opponent_conf', 
        'team_division_y': 'opponent_division',
        'team_abbr_x': 'team_abbr',
        'team_conf_x': 'team_conf',
        'team_division_x': 'team_division'
    }
)
data.drop(
    ['team_abbr', 'opponent_abbr'],
    axis=1, 
    inplace=True
)

data['division_matchup'] = np.where(
    (data['team_division'] == data['opponent_division']),
    1, 
    0
)

data['conference_matchup'] = np.where(
    (data['team_conf'] == data['opponent_conf']),
    1, 
    0
)

data = pd.merge(
    left=data,
    right=pd.get_dummies(data['team_conf'], prefix='team'),
    left_index=True,
    right_index=True,
    how='inner'
)

data = pd.merge(
    left=data,
    right=pd.get_dummies(data['opponent_conf'], prefix='opponent'),
    left_index=True,
    right_index=True,
    how='inner'
)

data = pd.merge(
    left=data,
    right=pd.get_dummies(data['team_division'], prefix='team'),
    left_index=True,
    right_index=True,
    how='inner'
)

data = pd.merge(
    left=data,
    right=pd.get_dummies(data['opponent_division'], prefix='opponent'),
    left_index=True,
    right_index=True,
    how='inner'
)

data = pd.merge(
    left=data,
    right=pd.get_dummies(data['position']),
    left_index=True,
    right_index=True,
    how='inner'
)

columns_to_drop = [
    'team_conf',
    'team_division',
    'opponent_conf',
    'opponent_division',
    'position',
    'headshot_url',
    
]
data.drop(
    columns_to_drop,
    axis=1,
    inplace=True
)
data.insert(0, 'player_id', data.pop('player_id'))
data.insert(1, 'first_name', data.pop('first_name'))
data.insert(2, 'last_name', data.pop('last_name'))
data.insert(3, 'team', data.pop('team'))
data.insert(4, 'opponent', data.pop('opponent'))

In [190]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 177781 entries, 0 to 177780
Data columns (total 76 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   player_id                    177781 non-null  object 
 1   first_name                   177781 non-null  object 
 2   last_name                    177781 non-null  object 
 3   team                         177781 non-null  object 
 4   opponent                     177781 non-null  object 
 5   season                       177781 non-null  int64  
 6   week                         177781 non-null  int64  
 7   completions                  177781 non-null  int32  
 8   attempts                     177781 non-null  int32  
 9   passing_yards                177781 non-null  float32
 10  passing_tds                  177781 non-null  int32  
 11  interceptions                177781 non-null  float32
 12  sack_fumbles_lost            177781 non-null  int32  
 13 