In [1]:
import pandas as pd
import numpy as np
from scipy.stats import norm, skewnorm
import itertools
from bs4 import BeautifulSoup
import re

In [2]:
cols = ['player', 'position', 'team', 'points', 'floor', 'ceiling']
positions = ['QB', 'RB', 'WR', 'TE']
df = pd.read_csv('projections_2023_wk0.csv')[cols]
df.player = df['player'].str.replace(r'Jr|II|\s*I|[.]', '', regex=True).str.rstrip()
df = df[df.position.isin(positions)]
df['position_rank'] = df.groupby('position').points.transform('rank', ascending=False)
df.set_index(['player', 'position'], inplace=True)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,team,points,floor,ceiling,position_rank
player,position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Christian McCaffrey,RB,SF,319.0,272.00,369.0,1.0
Austin Ekeler,RB,LAC,311.0,271.00,335.0,2.0
Travis Kelce,TE,KC,286.0,262.00,319.0,1.0
Justin Jefferson,WR,MIN,331.0,286.00,369.0,1.0
Ja'Marr Chase,WR,CIN,316.0,272.00,361.0,2.0
...,...,...,...,...,...,...
Baker Mayfield,QB,TB,170.0,96.30,230.0,32.0
Matt Ryan,QB,FA,70.8,,,33.0
Colt McCoy,QB,ARI,64.9,22.40,102.0,34.0
Kyle Trask,QB,TB,55.4,5.71,91.3,35.0


In [3]:
adp_std_bye = pd.read_csv('adp_std_bye.csv')[['Name', 'Position', 'Std. Dev', 'Bye']]
adp_std_bye.rename(columns={'Name':'player', 'Position': 'position', 'Std. Dev': 'sd_adp', 'Bye': 'bye'}, inplace=True)
adp_std_bye.player = adp_std_bye['player'].str.replace(r'Jr|II|[.]', '', regex=True).str.rstrip()
adp_std_bye = adp_std_bye[adp_std_bye.position.isin(['QB', 'RB', 'WR', 'TE'])]
adp_std_bye.set_index(['player', 'position'], inplace=True)
adp_std_bye.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,sd_adp,bye
player,position,Unnamed: 2_level_1,Unnamed: 3_level_1
Justin Jefferson,WR,0.7,13
Christian McCaffrey,RB,0.9,9
Ja'Marr Chase,WR,0.8,7
Tyreek Hill,WR,1.2,10
Austin Ekeler,RB,1.5,5


In [4]:
adp = pd.read_csv('adp_sources.csv')[['Player', 'POS', 'ESPN', 'Sleeper', 'RTSports']]
adp.dropna(inplace=True)
adp.columns = [column_name.lower() for column_name in adp.columns]
adp.rename(columns={'pos':'position',
                    'sleeper':'adp_sleeper',
                    'espn': 'adp_espn',
                    'rtsports': 'adp_rtsports'}, inplace=True)
adp.position = adp['position'].str.replace(r'\d', '',regex=True).str.rstrip()
adp.player = adp['player'].str.replace(r'Jr|II|[.]', '', regex=True).str.rstrip()
adp.player = np.where(adp.player=='Gabe Davis', 'Gabriel Davis', adp.player)
adp.set_index(['player', 'position'], inplace=True)
adp

Unnamed: 0_level_0,Unnamed: 1_level_0,adp_espn,adp_sleeper,adp_rtsports
player,position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Justin Jefferson,WR,1.0,1.0,1.0
Christian McCaffrey,RB,3.0,2.0,2.0
Ja'Marr Chase,WR,2.0,3.0,3.0
Austin Ekeler,RB,4.0,4.0,4.0
Travis Kelce,TE,5.0,5.0,8.0
...,...,...,...,...
Hunter Henry,TE,228.0,241.0,207.0
Justyn Ross,WR,212.0,180.0,227.0
Hayden Hurst,TE,231.0,222.0,225.0
Jake Moody,K,236.0,202.0,249.0


In [5]:
merged = df.merge(adp_std_bye, left_index=True, right_index=True, how='left')
merged = merged[(merged.points>100)]
merged = merged.merge(adp, left_index=True, right_index=True, how='left')
merged.reset_index(inplace=True)
merged = merged[~(merged.adp_espn.isna())&(merged.team!='FA')&~(merged.sd_adp.isna())]
merged.head(20)

Unnamed: 0,player,position,team,points,floor,ceiling,position_rank,sd_adp,bye,adp_espn,adp_sleeper,adp_rtsports
0,Christian McCaffrey,RB,SF,319.0,272.0,369.0,1.0,0.9,9.0,3.0,2.0,2.0
1,Austin Ekeler,RB,LAC,311.0,271.0,335.0,2.0,1.5,5.0,4.0,4.0,4.0
2,Travis Kelce,TE,KC,286.0,262.0,319.0,1.0,1.8,10.0,5.0,5.0,8.0
3,Justin Jefferson,WR,MIN,331.0,286.0,369.0,1.0,0.7,13.0,1.0,1.0,1.0
4,Ja'Marr Chase,WR,CIN,316.0,272.0,361.0,2.0,0.8,7.0,2.0,3.0,3.0
5,Cooper Kupp,WR,LAR,312.0,272.0,345.0,3.0,1.8,10.0,7.0,6.0,7.0
6,Saquon Barkley,RB,NYG,275.0,244.0,321.0,3.0,2.2,13.0,9.0,10.0,6.0
7,Tyreek Hill,WR,MIA,306.0,270.0,329.0,4.0,1.2,10.0,6.0,7.0,5.0
8,Tony Pollard,RB,DAL,264.0,227.0,303.0,4.0,2.7,7.0,15.0,21.0,12.0
9,CeeDee Lamb,WR,DAL,291.0,249.0,328.0,5.0,2.3,7.0,16.0,11.0,13.0


In [6]:
def best_params(player_inputs):
    points = player_inputs[1]
    floor = player_inputs[2]
    ceiling = player_inputs[3]

    if (ceiling-points)>(points-floor):
        a_range = np.arange(0,15,0.25)
        loc_range = np.arange(int(floor),int(points), 0.25)
        std = round((ceiling-points)/1.283,1)
        scale_range = np.arange(std-5, std+5, 0.25)
    else:
        a_range = -1*np.arange(0,15,0.25)
        loc_range = np.arange(int(points), int(ceiling), 0.25)
        std = round((points-floor)/1.283,1)
        scale_range = np.arange(std-5, std+5, 0.25)

    arr = np.array(list(set(itertools.product(a_range, loc_range, scale_range))))
    lower, upper = skewnorm.interval(0.90, a=arr[:,0], loc=arr[:,1], scale = arr[:,2])
    mean = skewnorm.mean(a=arr[:,0], loc=arr[:,1], scale = arr[:,2])
    lower_check = abs(lower/floor-1)
    upper_check = abs(upper/ceiling-1)
    mean_check = abs(points/mean-1)
    combined_checks = mean_check+lower_check+upper_check
    best_params = arr[combined_checks.argsort()][0,:]

    params = {'a': best_params[0], 'loc': best_params[1], 'scale':best_params[2]}

    return params

In [7]:
def player_distribution_params(df):
    params_inputs = list(zip(df.player, df.points, df.floor, df.ceiling))
    
    params = {}
    for player_inputs in params_inputs:
        player = player_inputs[0]
        player_params = best_params(player_inputs)
        params[player] = player_params
        
    return params

In [8]:
player_params = player_distribution_params(merged)

In [9]:
merged['params'] = merged.player.map(player_params)
merged

Unnamed: 0,player,position,team,points,floor,ceiling,position_rank,sd_adp,bye,adp_espn,adp_sleeper,adp_rtsports,params
0,Christian McCaffrey,RB,SF,319.0,272.0,369.0,1.0,0.9,9.0,3.0,2.0,2.0,"{'a': 1.0, 'loc': 299.0, 'scale': 35.75}"
1,Austin Ekeler,RB,LAC,311.0,271.0,335.0,2.0,1.5,5.0,4.0,4.0,4.0,"{'a': -6.0, 'loc': 334.75, 'scale': 32.45}"
2,Travis Kelce,TE,KC,286.0,262.0,319.0,1.0,1.8,10.0,5.0,5.0,8.0,"{'a': 4.0, 'loc': 264.25, 'scale': 27.95}"
3,Justin Jefferson,WR,MIN,331.0,286.0,369.0,1.0,0.7,13.0,1.0,1.0,1.0,"{'a': -2.0, 'loc': 356.75, 'scale': 36.1}"
4,Ja'Marr Chase,WR,CIN,316.0,272.0,361.0,2.0,0.8,7.0,2.0,3.0,3.0,"{'a': 0.75, 'loc': 301.25, 'scale': 30.85}"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,Jordan Love,QB,GB,226.0,207.0,256.0,21.0,20.8,6.0,177.0,187.0,195.0,"{'a': 6.25, 'loc': 207.0, 'scale': 24.9}"
168,Brock Purdy,QB,SF,225.0,190.0,266.0,22.5,26.2,9.0,164.0,159.0,179.0,"{'a': 1.75, 'loc': 203.0, 'scale': 32.0}"
169,Matthew Stafford,QB,LAR,225.0,185.0,253.0,22.5,18.1,10.0,166.0,208.0,184.0,"{'a': -4.75, 'loc': 251.5, 'scale': 33.95}"
171,Quentin Johnston,WR,LAC,139.0,94.4,170.0,68.0,8.4,5.0,125.0,100.0,133.0,"{'a': -4.75, 'loc': 168.5, 'scale': 37.8}"


In [10]:
merged.to_csv('merged_data.csv', index=False)

In [11]:
def get_all_rows(file:str):
    with open(file) as fp:
        soup = BeautifulSoup(fp, 'html.parser')
        table = soup.find_all("table", {"class": "sip-table"})[0]
        tbody = table.find_all('tbody', recursive=False)
        rows = tbody[0].find_all('tr', recursive=False)
    return rows


In [12]:
def get_player_data(row):
        try:
            player = row.find('span', {'class': 'player-name'}).text.split(',')[0].replace('  ', '')
            injury_probability = row.find('span', {'class': 'prob-injury-per-game'}).text.split('%')[0]
            risk = row.find('span', {'class': 'injury-risk'}).text
            injury_probability = round(float(injury_probability)/100,6)
            return player, injury_probability, risk
        except:
            pass


In [13]:
def injury_risk_df():
    rows = []
    injury_risk = {}
    for position in ['QB', 'RB', 'WR', 'TE']:
        rows.extend(get_all_rows(f'{position.lower()}_injury_data.html'))

    for row in rows:
        try:
            player, injury_probability, risk = get_player_data(row)
            injury_risk[player] = {'p_injury_game':injury_probability, 'injury_risk':risk}
        except:
            pass

    df = pd.DataFrame.from_dict(injury_risk, orient='index').reset_index()
    df.rename(columns={'index': 'player'}, inplace=True)
    df.player = df['player'].str.replace(r'Jr|II|[.]|\s*I', '', regex=True).str.rstrip()
    df.injury_risk = df['injury_risk'].str.replace(r'Very|Risk', '', regex=True).str.rstrip()
    return df

In [14]:
injuries = injury_risk_df()
injuries

Unnamed: 0,player,p_injury_game,injury_risk
0,Josh Allen,0.008,Low
1,Jalen Hurts,0.051,High
2,Patrick Mahomes,0.017,Low
3,Lamar Jackson,0.042,High
4,Justin Herbert,0.063,High
...,...,...,...
340,Hunter Long,0.005,Low
341,Brevin Jordan,0.032,High
342,Jimmy Graham,0.017,Low
343,Devin Asiasi,0.007,Low


In [15]:
merged = merged.merge(injuries, how='left', on='player')
merged = merged[~merged.injury_risk.isna()]

In [19]:
merged.head(10)

Unnamed: 0,player,position,team,points,floor,ceiling,position_rank,sd_adp,bye,adp_espn,adp_sleeper,adp_rtsports,params,p_injury_game,injury_risk
0,Christian McCaffrey,RB,SF,319.0,272.0,369.0,1.0,0.9,9.0,3.0,2.0,2.0,"{'a': 1.0, 'loc': 299.0, 'scale': 35.75}",0.096,High
1,Austin Ekeler,RB,LAC,311.0,271.0,335.0,2.0,1.5,5.0,4.0,4.0,4.0,"{'a': -6.0, 'loc': 334.75, 'scale': 32.45}",0.085,High
2,Travis Kelce,TE,KC,286.0,262.0,319.0,1.0,1.8,10.0,5.0,5.0,8.0,"{'a': 4.0, 'loc': 264.25, 'scale': 27.95}",0.015,Low
3,Justin Jefferson,WR,MIN,331.0,286.0,369.0,1.0,0.7,13.0,1.0,1.0,1.0,"{'a': -2.0, 'loc': 356.75, 'scale': 36.1}",0.03,Medium
4,Ja'Marr Chase,WR,CIN,316.0,272.0,361.0,2.0,0.8,7.0,2.0,3.0,3.0,"{'a': 0.75, 'loc': 301.25, 'scale': 30.85}",0.109,High
5,Cooper Kupp,WR,LAR,312.0,272.0,345.0,3.0,1.8,10.0,7.0,6.0,7.0,"{'a': -2.25, 'loc': 336.0, 'scale': 32.7}",0.048,Medium
6,Saquon Barkley,RB,NYG,275.0,244.0,321.0,3.0,2.2,13.0,9.0,10.0,6.0,"{'a': 6.0, 'loc': 244.25, 'scale': 39.15}",0.055,High
7,Tyreek Hill,WR,MIA,306.0,270.0,329.0,4.0,1.2,10.0,6.0,7.0,5.0,"{'a': -5.5, 'loc': 328.5, 'scale': 29.85}",0.132,High
8,Tony Pollard,RB,DAL,264.0,227.0,303.0,4.0,2.7,7.0,15.0,21.0,12.0,"{'a': 1.0, 'loc': 248.25, 'scale': 27.9}",0.042,Medium
9,CeeDee Lamb,WR,DAL,291.0,249.0,328.0,5.0,2.3,7.0,16.0,11.0,13.0,"{'a': -1.75, 'loc': 314.5, 'scale': 33.45}",0.063,High


In [23]:
merged['flex_rank'] = merged.loc[merged.position.isin(['RB', 'WR']).astype(bool), 'points'].rank(ascending=False)

In [24]:
merged

Unnamed: 0,player,position,team,points,floor,ceiling,position_rank,sd_adp,bye,adp_espn,adp_sleeper,adp_rtsports,params,p_injury_game,injury_risk,flex_rank
0,Christian McCaffrey,RB,SF,319.0,272.0,369.0,1.0,0.9,9.0,3.0,2.0,2.0,"{'a': 1.0, 'loc': 299.0, 'scale': 35.75}",0.096,High,2.0
1,Austin Ekeler,RB,LAC,311.0,271.0,335.0,2.0,1.5,5.0,4.0,4.0,4.0,"{'a': -6.0, 'loc': 334.75, 'scale': 32.45}",0.085,High,5.0
2,Travis Kelce,TE,KC,286.0,262.0,319.0,1.0,1.8,10.0,5.0,5.0,8.0,"{'a': 4.0, 'loc': 264.25, 'scale': 27.95}",0.015,Low,
3,Justin Jefferson,WR,MIN,331.0,286.0,369.0,1.0,0.7,13.0,1.0,1.0,1.0,"{'a': -2.0, 'loc': 356.75, 'scale': 36.1}",0.030,Medium,1.0
4,Ja'Marr Chase,WR,CIN,316.0,272.0,361.0,2.0,0.8,7.0,2.0,3.0,3.0,"{'a': 0.75, 'loc': 301.25, 'scale': 30.85}",0.109,High,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,Jordan Love,QB,GB,226.0,207.0,256.0,21.0,20.8,6.0,177.0,187.0,195.0,"{'a': 6.25, 'loc': 207.0, 'scale': 24.9}",0.015,Low,
146,Brock Purdy,QB,SF,225.0,190.0,266.0,22.5,26.2,9.0,164.0,159.0,179.0,"{'a': 1.75, 'loc': 203.0, 'scale': 32.0}",0.012,Low,
147,Matthew Stafford,QB,LAR,225.0,185.0,253.0,22.5,18.1,10.0,166.0,208.0,184.0,"{'a': -4.75, 'loc': 251.5, 'scale': 33.95}",0.050,High,
148,Quentin Johnston,WR,LAC,139.0,94.4,170.0,68.0,8.4,5.0,125.0,100.0,133.0,"{'a': -4.75, 'loc': 168.5, 'scale': 37.8}",0.039,Medium,97.0


In [25]:
merged.to_csv('merged_data.csv', index=False)