In [135]:
import numpy as np
import pandas as pd
import nfl_data_py as nfl
from datetime import datetime

In [136]:
years_to_analyze = range(2013, datetime.now().year)

In [137]:
weekly_data = nfl.import_weekly_data(years=years_to_analyze)
roster_data = nfl.import_rosters(years=years_to_analyze)
snap = nfl.import_snap_counts(years=years_to_analyze)
team_info = nfl.import_team_desc()
inj = nfl.import_injuries(years_to_analyze)

Downcasting floats.


In [138]:
inj = inj[
    inj['position'].isin(['WR', 'RB', 'TE', 'QB'])
]
inj = inj[inj['game_type'] == 'REG']
inj['season'] = inj['season'].astype(int)
inj['week'] = inj['week'].astype(int)
inj['report_primary_injury'] = inj['report_primary_injury'].str.lower()
inj['report_secondary_injury'] = inj['report_secondary_injury'].str.lower()
inj['report_status'] = inj['report_status'].str.lower()
inj['practice_primary_injury'] = inj['practice_primary_injury'].str.lower()
inj['practice_secondary_injury'] = inj['practice_secondary_injury'].str.lower()
inj['practice_status'] = inj['practice_status'].str.lower()

injuries = pd.DataFrame(
    inj.groupby(['season', 'week', 'team', 'position', ])[['report_status', 'practice_status']].value_counts()
).reset_index()

injuries = pd.get_dummies(
    injuries, 
    columns=['position'], 
    prefix='positional_injuries'
).groupby(['season', 'week', 'team']).sum().reset_index()
injuries.drop(0, axis=1, inplace=True)

In [140]:
def convert_height_to_inches(height_string: str) -> int:
    """
    Converts the height string (Feet - Inches) into inches.
    
    Arguments:
        height_string (str): Player height as a string in the format of height-inches.
        
    Returns:
        int: Player height in inches.
    
    """
        
    height_string_split = height_string.split('-')
    
    feet, inches = height_string_split[0], height_string_split[1]
    feet, inches = int(feet), int(inches)
    height = feet * 12 + inches
    return height

In [141]:
summed = weekly_data.groupby(['player_id', 'season', 'week']).sum().reset_index()

data = pd.merge(
    left=summed, 
    right=roster_data, 
    left_on='player_id', 
    right_on='player_id', 
    how='inner', 
    suffixes=('', '_DROP')
).filter(regex='^(?!.*_DROP)')

columns_to_drop = [
    'sacks',
    'sack_yards',
    'sack_fumbles',
    'espn_id',
    'sportradar_id',
    'yahoo_id',
    'rotowire_id',
    'pff_id',
    'fantasy_data_id',
    'sleeper_id',
    'esb_id',
    'gsis_it_id',
    'smart_id',
    'ngs_position',
    'entry_year',
    'player_name',
    'birth_date',
    'jersey_number',
    'special_teams_tds',
    'college',
    'depth_chart_position',
    'fantasy_points',
    'fantasy_points_ppr',
]                  
data.drop(columns_to_drop, axis=1, inplace=True)
data = data[
    data['position'].isin(['WR', 'RB', 'TE', 'QB'])
]
data = data[~data['rookie_year'].isna()]
data['rookie_year'] = data['rookie_year'].astype(int)
data['years_exp'] = (data['season'].astype(int) - data['rookie_year'].astype(int))
data['height'] = data['height'].apply(lambda height: convert_height_to_inches(height))
data.drop('weight', axis=1, inplace=True)
data['first_name'] = data['first_name'].str.lower()
data['last_name'] = data['last_name'].str.lower()

data.loc[data['fantasy_points'] > 0, 'status'] = 'Active'
data.loc[data['fantasy_points_ppr'] > 0, 'status'] = 'Active'

rows_to_drop = data.loc[
    (data['fantasy_points'] <= 0) & (data['status'] != 'Active'),
    'status'
].index
data.drop(rows_to_drop, axis=0, inplace=True)
data.drop('status', axis=1, inplace=True)

snap = snap[snap['position'].isin(data['position'].unique())]
data = pd.merge(
    left=data,
    right=snap,
    left_on=['pfr_id', 'season', 'week'],
    right_on=['pfr_player_id', 'season', 'week'],
    how='inner', 
    suffixes=('', '_DROP')
).filter(regex='^(?!.*_DROP)')
data = data[data['game_type'] == 'REG']

columns_to_drop = [
    'pfr_id',
    'game_id',
    'pfr_game_id',
    'game_type',
    'player',
    'pfr_player_id',
    'defense_snaps',
    'defense_pct',
    'st_snaps',
    'st_pct',
    
]
data.drop(
    columns_to_drop,
    axis=1,
    inplace=True
)

data = pd.merge(
    left=data,
    right=team_info[['team_abbr', 'team_conf', 'team_division']],
    left_on='team',
    right_on='team_abbr',
    how='inner',
)
data = pd.merge(
    left=data,
    right=team_info[['team_abbr', 'team_conf', 'team_division']],
    left_on='opponent',
    right_on='team_abbr',
    how='inner',
).rename(
    columns={
        'team_abbr_y': 'opponent_abbr', 
        'team_conf_y': 'opponent_conf', 
        'team_division_y': 'opponent_division',
        'team_abbr_x': 'team_abbr',
        'team_conf_x': 'team_conf',
        'team_division_x': 'team_division'
    }
)
data.drop(
    ['team_abbr', 'opponent_abbr'],
    axis=1, 
    inplace=True
)

data['division_matchup'] = np.where(
    (data['team_division'] == data['opponent_division']),
    1, 
    0
)

data['conference_matchup'] = np.where(
    (data['team_conf'] == data['opponent_conf']),
    1, 
    0
)

data = pd.merge(
    left=data,
    right=pd.get_dummies(data['team_conf'], prefix='team'),
    left_index=True,
    right_index=True,
    how='inner'
)

data = pd.merge(
    left=data,
    right=pd.get_dummies(data['opponent_conf'], prefix='opponent'),
    left_index=True,
    right_index=True,
    how='inner'
)

data = pd.merge(
    left=data,
    right=pd.get_dummies(data['team_division'], prefix='team'),
    left_index=True,
    right_index=True,
    how='inner'
)

data = pd.merge(
    left=data,
    right=pd.get_dummies(data['opponent_division'], prefix='opponent'),
    left_index=True,
    right_index=True,
    how='inner'
)

data = pd.merge(
    left=data,
    right=pd.get_dummies(data['position']),
    left_index=True,
    right_index=True,
    how='inner'
)

columns_to_drop = [
    'team_conf',
    'team_division',
    'opponent_conf',
    'opponent_division',
    'position',
    'headshot_url',
    
]
data.drop(
    columns_to_drop,
    axis=1,
    inplace=True
)
data.insert(0, 'player_id', data.pop('player_id'))
data.insert(1, 'first_name', data.pop('first_name'))
data.insert(2, 'last_name', data.pop('last_name'))
data.insert(3, 'team', data.pop('team'))
data.insert(4, 'opponent', data.pop('opponent'))

In [143]:
data = pd.merge(
    left=data,
    right=injuries,
    left_on=['season', 'week', 'team'],
    right_on=['season', 'week', 'team'],
    how='left'
)

data['positional_injuries_QB'] = data['positional_injuries_QB'].fillna(0)
data['positional_injuries_RB'] = data['positional_injuries_RB'].fillna(0)
data['positional_injuries_TE'] = data['positional_injuries_TE'].fillna(0)
data['positional_injuries_WR'] = data['positional_injuries_WR'].fillna(0)

data['positional_injuries_QB'] = data['positional_injuries_QB'].astype(int)
data['positional_injuries_RB'] = data['positional_injuries_RB'].astype(int)
data['positional_injuries_TE'] = data['positional_injuries_TE'].astype(int)
data['positional_injuries_WR'] = data['positional_injuries_WR'].astype(int)

In [147]:
data.drop_duplicates(inplace=True)

In [148]:
data.sort_values(by=['season', 'week', 'team'], ascending=True)

Unnamed: 0,player_id,first_name,last_name,team,opponent,season,week,completions,attempts,passing_yards,...,opponent_NFC South,opponent_NFC West,QB,RB,TE,WR,positional_injuries_QB,positional_injuries_RB,positional_injuries_TE,positional_injuries_WR
6770,00-0022921,larry,fitzgerald,ARI,STL,2013,1,0,0,0.0,...,0,1,0,0,0,1,0,1,2,1
61417,00-0030061,zachary,ertz,ARI,WAS,2013,1,0,0,0.0,...,0,0,0,0,1,0,0,1,2,1
88305,00-0027873,jermaine,gresham,ARI,CHI,2013,1,0,0,0.0,...,0,0,0,0,1,0,0,1,2,1
88311,00-0027942,adriel,green,ARI,CHI,2013,1,0,0,0.0,...,0,0,0,0,0,1,0,1,2,1
104917,00-0028112,charles,clay,ARI,CLE,2013,1,0,0,0.0,...,0,0,0,0,1,0,0,1,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80716,00-0036628,john,bates,WAS,NYG,2021,18,0,0,0.0,...,0,0,0,0,1,0,0,0,2,1
80717,00-0036755,jaret,patterson,WAS,NYG,2021,18,0,0,0.0,...,0,0,0,1,0,0,0,0,2,1
86198,00-0029604,kirk,cousins,WAS,CHI,2021,18,14,22,250.0,...,0,0,1,0,0,0,0,0,2,1
142606,00-0033528,jeremy,sprinkle,WAS,PHI,2021,18,0,0,0.0,...,0,0,0,0,1,0,0,0,2,1


In [None]:
 Rushing/Receiving TDs	 6 points
 Rushing/Receiving Yards	 1 point for every 10 yards
 Receptions	 0.5 points
 Passing TDs	 4 points
 Passing Interceptions Thrown	 Negative 1 point
 Fumbles Lost to Opponent	 Negative 2 points
 Passing Yards	 1 point for every 25 yards
 Passing/Rushing/Receiving 2 Point Conversions	 2 points
 Field Goal Made (0-39 yards)	 3 points
 Field Goal Made (40-49 yards)	 4 points
 Field Goal Made (50+ yards)	 5 points
 Field Goal Missed	 Negative 1 point
 Extra Point Made	 1 point
 Extra Point Missed	 Negative 1 point

In [149]:
rushing_td = 6
receiving_td = 6
rushing_yards = 0.1
receiving_yards = 0.1
receptions = 0.5
passing_td = 4
interception = -1
fumble = -2
passing_yards = 1/25
two_pt_conversion = 2

In [155]:
def half_ppr_scoring(
    rushing_tds: int, 
    receiving_tds: int, 
    rushing_yards: int, 
    receiving_yards: int,
    receptions: int,
    passing_tds: int,
    interceptions: int,
    fumbles: int,
    passing_yards: int,
    two_pt_conversions: int
) -> float:
    """
    Calculates a player's fantasy point total in 
    Half-Point-per-Reception scoring.
    
    Arguments:
        rushing_tds (int): Number of rushing touchdowns scored by the player.
        receiving_tds (int): Number of receiving touchdowns scored by the player.
        rushing_yards (int): Amount of rushing yards by the player.
        receiving_yards (int): Amount of receiving yards by the player.
        receptions (int): Number of receptions by the player.
        passing_tds (int): Number of passing touchdowns scored by the player.
        interceptions (int): Number of interceptions thrown by the player.
        fumbles (int): Number of times the player fumbled the ball to the opponent.
        passing_yards (int): Number of passing yards thrown by the player.
        two_pt_conversions (int): Number of two point conversions scored by the player.
        
    Returns:
        float: Fantasy Score in Half-Point-per-Reception scoring.
    
    """
    
    RUSHING_TD = 6
    RECEIVING_TD = 6
    RUSHING_YARDS = 0.1
    RECEIVING_YARDS = 0.1
    RECEPTIONS = 0.5
    PASSING_TD = 4
    INTERCEPTION = -1
    FUMBLE = -2
    PASSING_YARDS = 1/25
    TWO_PT_CONVERSION = 2
    
    half_ppr_score = rushing_tds * RUSHING_TD + \
                     receiving_tds * RECEIVING_TD + \
                     rushing_yards * RUSHING_YARDS + \
                     receiving_yards * RECEIVING_YARDS + \
                     receptions * RECEPTIONS + \
                     passing_tds * PASSING_TD + \
                     interceptions * INTERCEPTION + \
                     fumbles * FUMBLE + \
                     passing_yards * PASSING_YARDS + \
                     two_pt_conversions * TWO_PT_CONVERSION
    
    return half_ppr_score

In [156]:
half_ppr_scoring(1, 1, 1, 1, 1, 1, 1, 1, 1, 1)

15.739999999999998

In [164]:
data['fantasy_half_ppr'] = data.apply(
    lambda x: half_ppr_scoring(
        x.rushing_tds, 
        x['receiving_tds'],
        x['rushing_yards'],
        x['receiving_yards'],
        x['receptions'],
        x['passing_tds'],
        x['interceptions'],
        x['sack_fumbles_lost'] + x['rushing_fumbles_lost'] + x['receiving_fumbles_lost'],
        x['passing_yards'],
        x['passing_2pt_conversions'] + x['rushing_2pt_conversions'] + x['receiving_2pt_conversions']
    ),
    axis=1
)