In [60]:
import pandas as pd
import nfl_data_py as nfl
from datetime import datetime

In [61]:
years_to_analyze = range(2000, datetime.now().year)

In [62]:
weekly_data = nfl.import_weekly_data(years=years_to_analyze)

Downcasting floats.


In [63]:
roster_data = nfl.import_rosters(years=[2021])

In [64]:
roster_data.columns

Index(['season', 'team', 'position', 'depth_chart_position', 'jersey_number',
       'status', 'player_name', 'first_name', 'last_name', 'birth_date',
       'height', 'weight', 'college', 'player_id', 'espn_id', 'sportradar_id',
       'yahoo_id', 'rotowire_id', 'pff_id', 'pfr_id', 'fantasy_data_id',
       'sleeper_id', 'years_exp', 'headshot_url', 'ngs_position', 'esb_id',
       'gsis_it_id', 'smart_id', 'entry_year', 'rookie_year'],
      dtype='object')

In [65]:
summed = weekly_data.groupby(['player_id', 'season', 'week']).sum().reset_index()
summed.head()

Unnamed: 0,player_id,season,week,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_yards,...,receiving_epa,receiving_2pt_conversions,special_teams_tds,fantasy_points,fantasy_points_ppr,pacr,racr,target_share,air_yards_share,wopr
0,00-0000003,2000,1,0,0,0.0,0,0.0,0.0,0.0,...,0.0,0,0.0,-0.2,-0.2,0.0,0.0,0.0,0.0,0.0
1,00-0000007,2000,1,0,0,0.0,0,0.0,0.0,0.0,...,-0.31212,0,0.0,0.3,1.3,0.0,0.0,0.0,0.0,0.0
2,00-0000007,2000,12,0,0,0.0,0,0.0,0.0,0.0,...,0.0,0,0.0,1.8,1.8,0.0,0.0,0.0,0.0,0.0
3,00-0000007,2000,14,0,0,0.0,0,0.0,0.0,0.0,...,0.609493,0,0.0,4.9,5.9,0.0,0.0,0.0,0.0,0.0
4,00-0000007,2000,15,0,0,0.0,0,0.0,0.0,0.0,...,0.0,0,0.0,1.4,1.4,0.0,0.0,0.0,0.0,0.0


In [134]:
def convert_height_to_inches(height_string: str) -> int:
    """
    Converts the height string (Feet - Inches) into inches.
    
    Arguments:
        height_string (str): Player height as a string in the format of height-inches.
        
    Returns:
        int: Player height in inches.
    
    """
    
    feet, inches = height_string.split('-')
    feet, inches = int(feet), int(inches)
    height = feet * 12 + inches
    return height

In [205]:
data = pd.merge(left=summed, right=roster_data, left_on='player_id', right_on='player_id', how='inner')

columns_to_drop = [
    'sacks',
    'sack_yards',
    'sack_fumbles',
    'espn_id',
    'sportradar_id',
    'yahoo_id',
    'rotowire_id',
    'pff_id',
    'pfr_id',
    'fantasy_data_id',
    'sleeper_id',
    'esb_id',
    'gsis_it_id',
    'smart_id',
    'ngs_position',
    'entry_year',
    'player_name',
    'birth_date',
    'jersey_number',
    'special_teams_tds',
    'college',
]
for duplicate_column in data.columns:
    if duplicate_column.endswith('_y'):
        columns_to_drop.append(duplicate_column)
                    
data.drop(columns_to_drop, axis=1, inplace=True)
rename_column_mapper = {}
for column_x in data.columns:
    if column_x.endswith('_x'):
        rename_column_mapper[column_x] = column_x[:-2]
        
data.rename(columns=rename_column_mapper, inplace=True)

data = data[
    data['position'].isin(['WR', 'RB', 'TE', 'QB'])
]
data['years_exp'] = (data['season'].astype(int) - data['rookie_year'].astype(int))
data['height'] = data['height'].apply(lambda height: convert_height_to_inches(height))
data.drop('weight', axis=1, inplace=True)
data['first_name'] = data['first_name'].str.lower()
data['last_name'] = data['last_name'].str.lower()

data.loc[data['fantasy_points'] > 0, 'status'] = 'Active'
data.loc[data['fantasy_points_ppr'] > 0, 'status'] = 'Active'

rows_to_drop = data.loc[
    (data['fantasy_points'] <= 0) & (data['status'] != 'Active'),
    'status'
].index
data.drop(rows_to_drop, axis=0, inplace=True)
data.drop('status', axis=1, inplace=True)
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 28403 entries, 0 to 29263
Data columns (total 50 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   player_id                    28403 non-null  object 
 1   season                       28403 non-null  int64  
 2   week                         28403 non-null  int64  
 3   completions                  28403 non-null  int32  
 4   attempts                     28403 non-null  int32  
 5   passing_yards                28403 non-null  float32
 6   passing_tds                  28403 non-null  int32  
 7   interceptions                28403 non-null  float32
 8   sack_fumbles_lost            28403 non-null  int32  
 9   passing_air_yards            28403 non-null  float32
 10  passing_yards_after_catch    28403 non-null  float32
 11  passing_first_downs          28403 non-null  float32
 12  passing_epa                  28403 non-null  float32
 13  passing_2pt_conv