## Imports
---

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

## Read in all of the position versus Defense data sets and clean for modeling
---

In [3]:
# read in data sets
QB_def = pd.read_csv('../data/DEF_QB.csv', index_col=[0])
RB_def = pd.read_csv('../data/DEF_RB.csv', index_col=[0])
WR_def = pd.read_csv('../data/DEF_WR.csv', index_col=[0])
TE_def = pd.read_csv('../data/DEF_TE.csv', index_col=[0])

In [20]:
# looks good!
QB_def.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 32 entries, 0 to 31
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   team      32 non-null     object 
 1   games     32 non-null     float64
 2   pass_cmp  32 non-null     float64
 3   pass_att  32 non-null     float64
 4   pass_yds  32 non-null     float64
 5   pass_td   32 non-null     float64
 6   pass_int  32 non-null     float64
 7   two_pt    32 non-null     float64
 8   sacks     32 non-null     float64
 9   rush_att  32 non-null     float64
 10  rush_yds  32 non-null     float64
 11  rush_td   32 non-null     float64
 12  DK_pt     32 non-null     float64
 13  DK_ptg    32 non-null     float64
dtypes: float64(13), object(1)
memory usage: 3.8+ KB


In [10]:
QB_def.isnull().sum()

team        0
games       0
pass_cmp    0
pass_att    0
pass_yds    0
pass_td     0
pass_int    0
two_pt      0
sacks       0
rush_att    0
rush_yds    0
rush_td     0
DK_pt       0
DK_ptg      0
dtype: int64

In [11]:
# looks good!
RB_def.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 32 entries, 0 to 31
Data columns (total 11 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   team      32 non-null     object 
 1   games     32 non-null     float64
 2   rush_att  32 non-null     float64
 3   rush_yds  32 non-null     float64
 4   rush_td   32 non-null     float64
 5   rec_tgt   32 non-null     float64
 6   rec       32 non-null     float64
 7   rec_yds   32 non-null     float64
 8   rec_td    32 non-null     float64
 9   DK_pt     32 non-null     float64
 10  DK_ptg    32 non-null     float64
dtypes: float64(10), object(1)
memory usage: 3.0+ KB


In [12]:
RB_def.isnull().sum()

team        0
games       0
rush_att    0
rush_yds    0
rush_td     0
rec_tgt     0
rec         0
rec_yds     0
rec_td      0
DK_pt       0
DK_ptg      0
dtype: int64

In [13]:
# looks good!
WR_def.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 32 entries, 0 to 31
Data columns (total 8 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   team     32 non-null     object 
 1   games    32 non-null     float64
 2   rec_tgt  32 non-null     float64
 3   rec      32 non-null     float64
 4   rec_yds  32 non-null     float64
 5   rec_td   32 non-null     float64
 6   DK_pt    32 non-null     float64
 7   DK_ptg   32 non-null     float64
dtypes: float64(7), object(1)
memory usage: 2.2+ KB


In [14]:
WR_def.isnull().sum()

team       0
games      0
rec_tgt    0
rec        0
rec_yds    0
rec_td     0
DK_pt      0
DK_ptg     0
dtype: int64

In [15]:
# looks good!
TE_def.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 32 entries, 0 to 31
Data columns (total 8 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   team     32 non-null     object 
 1   games    32 non-null     float64
 2   rec_tgt  32 non-null     float64
 3   rec      32 non-null     float64
 4   rec_yds  32 non-null     float64
 5   rec_td   32 non-null     float64
 6   DK_pt    32 non-null     float64
 7   DK_ptg   32 non-null     float64
dtypes: float64(7), object(1)
memory usage: 2.2+ KB


In [16]:
TE_def.isnull().sum()

team       0
games      0
rec_tgt    0
rec        0
rec_yds    0
rec_td     0
DK_pt      0
DK_ptg     0
dtype: int64

##### No need to resave these CSV files
---

## Read in all of the Red Zone by position data sets and clean for modeling
---

In [84]:
# read in data sets
QB_rz = pd.read_csv('../data/pass_rz.csv')
RB_rz = pd.read_csv('../data/rush_rz.csv')
WR_rz = pd.read_csv('../data/rec_rz.csv')

In [61]:
QB_rz.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57 entries, 0 to 56
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   player           56 non-null     object 
 1   team             56 non-null     object 
 2   pass_cmp_20      56 non-null     float64
 3   pass_att_20      56 non-null     float64
 4   pass_cmp_pct_20  56 non-null     float64
 5   pass_yds_20      56 non-null     float64
 6   pass_td_20       56 non-null     float64
 7   pass_int_20      56 non-null     float64
 8   pass_cmp_10      56 non-null     float64
 9   pass_att_10      56 non-null     float64
 10  pass_cmp_pct_10  50 non-null     float64
 11  pass_yds_10      56 non-null     float64
 12  pass_td_10       56 non-null     float64
 13  pass_int_10      56 non-null     float64
dtypes: float64(12), object(2)
memory usage: 6.4+ KB


In [62]:
QB_rz.isnull().sum()

player             1
team               1
pass_cmp_20        1
pass_att_20        1
pass_cmp_pct_20    1
pass_yds_20        1
pass_td_20         1
pass_int_20        1
pass_cmp_10        1
pass_att_10        1
pass_cmp_pct_10    7
pass_yds_10        1
pass_td_10         1
pass_int_10        1
dtype: int64

In [63]:
QB_rz.fillna(0, inplace=True)

In [64]:
QB_rz.isnull().sum()

player             0
team               0
pass_cmp_20        0
pass_att_20        0
pass_cmp_pct_20    0
pass_yds_20        0
pass_td_20         0
pass_int_20        0
pass_cmp_10        0
pass_att_10        0
pass_cmp_pct_10    0
pass_yds_10        0
pass_td_10         0
pass_int_10        0
dtype: int64

In [65]:
# drop last row... not sure why this happened
QB_rz.drop([56], inplace=True)

---

In [66]:
RB_rz.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 169 entries, 0 to 168
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   player            168 non-null    object 
 1   team              168 non-null    object 
 2   rush_att_20       168 non-null    float64
 3   rush_yds_20       168 non-null    float64
 4   rush_td_20        168 non-null    float64
 5   team_rush_pct_20  167 non-null    object 
 6   rush_att_10       168 non-null    float64
 7   rush_yds_10       168 non-null    float64
 8   rush_td_10        168 non-null    float64
 9   team_rush_pct_10  167 non-null    object 
 10  rush_att_5        168 non-null    float64
 11  rush_yds_5        168 non-null    float64
 12  rush_td_5         168 non-null    float64
 13  team_rush_pct_5   167 non-null    object 
dtypes: float64(9), object(5)
memory usage: 18.6+ KB


In [67]:
# pulled this from SO, although I think we covered this in a breakfast hour talk?
# perform to the 3 relevant columns
RB_rz['team_rush_pct_20'] = RB_rz['team_rush_pct_20'].str.rstrip('%').astype(float) / 100
RB_rz['team_rush_pct_10'] = RB_rz['team_rush_pct_10'].str.rstrip('%').astype(float) / 100
RB_rz['team_rush_pct_5'] = RB_rz['team_rush_pct_5'].str.rstrip('%').astype(float) / 100

In [70]:
RB_rz

Unnamed: 0,player,team,rush_att_20,rush_yds_20,rush_td_20,team_rush_pct_20,rush_att_10,rush_yds_10,rush_td_10,team_rush_pct_10,rush_att_5,rush_yds_5,rush_td_5,team_rush_pct_5
0,Jonathan Taylor,IND,44.0,119.0,5.0,0.733,23.0,16.0,4.0,0.697,15.0,7.0,3.0,0.789
1,Derrick Henry,TEN,32.0,97.0,8.0,0.711,17.0,47.0,7.0,0.630,8.0,5.0,4.0,0.533
2,Aaron Jones,GNB,28.0,78.0,3.0,0.667,13.0,26.0,3.0,0.650,5.0,6.0,3.0,0.833
3,Damien Harris,NWE,27.0,69.0,6.0,0.563,20.0,42.0,6.0,0.667,10.0,13.0,6.0,0.667
4,Nick Chubb,CLE,26.0,93.0,3.0,0.400,13.0,29.0,2.0,0.419,7.0,10.0,2.0,0.500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
164,Kadarius Toney,NYG,1.0,7.0,0.0,0.027,1.0,7.0,0.0,0.053,0.0,0.0,0.0,0.000
165,Mitchell Trubisky,BUF,1.0,4.0,1.0,0.020,1.0,4.0,1.0,0.036,1.0,4.0,1.0,0.100
166,Russell Wilson,SEA,1.0,16.0,1.0,0.042,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.000
167,Zach Wilson,NYJ,1.0,-3.0,0.0,0.042,1.0,-3.0,0.0,0.063,1.0,-3.0,0.0,0.111


In [71]:
RB_rz.isnull().sum()

player              1
team                1
rush_att_20         1
rush_yds_20         1
rush_td_20          1
team_rush_pct_20    2
rush_att_10         1
rush_yds_10         1
rush_td_10          1
team_rush_pct_10    2
rush_att_5          1
rush_yds_5          1
rush_td_5           1
team_rush_pct_5     2
dtype: int64

In [72]:
RB_rz.fillna(0, inplace=True)

In [73]:
RB_rz.isnull().sum()

player              0
team                0
rush_att_20         0
rush_yds_20         0
rush_td_20          0
team_rush_pct_20    0
rush_att_10         0
rush_yds_10         0
rush_td_10          0
team_rush_pct_10    0
rush_att_5          0
rush_yds_5          0
rush_td_5           0
team_rush_pct_5     0
dtype: int64

In [74]:
# drop last row... not sure why this happened
RB_rz.drop([168], inplace=True)

---

In [85]:
WR_rz.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 311 entries, 0 to 310
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   player               310 non-null    object 
 1   team                 310 non-null    object 
 2   rec_tgt_20           310 non-null    float64
 3   rec_20               310 non-null    float64
 4   rec_pct_20           310 non-null    object 
 5   rec_yds_20           310 non-null    float64
 6   rec_td_20            310 non-null    float64
 7   team_rec_tgt_pct_20  308 non-null    object 
 8   rec_tgt_10           310 non-null    float64
 9   rec_10               310 non-null    float64
 10  rec_pct_10           229 non-null    object 
 11  rec_yds_10           310 non-null    float64
 12  rec_td_10            310 non-null    float64
 13  team_rec_tgt_pct_10  308 non-null    object 
dtypes: float64(8), object(6)
memory usage: 34.1+ KB


In [86]:
# pulled this from SO, although I think we covered this in a breakfast hour talk?
# perform to the 4 relevant columns
WR_rz['team_rec_tgt_pct_20'] = WR_rz['team_rec_tgt_pct_20'].str.rstrip('%').astype(float) / 100
WR_rz['team_rec_tgt_pct_10'] = WR_rz['team_rec_tgt_pct_10'].str.rstrip('%').astype(float) / 100
WR_rz['rec_pct_20'] = WR_rz['rec_pct_20'].str.rstrip('%').astype(float) / 100
WR_rz['rec_pct_10'] = WR_rz['rec_pct_10'].str.rstrip('%').astype(float) / 100

In [87]:
WR_rz

Unnamed: 0,player,team,rec_tgt_20,rec_20,rec_pct_20,rec_yds_20,rec_td_20,team_rec_tgt_pct_20,rec_tgt_10,rec_10,rec_pct_10,rec_yds_10,rec_td_10,team_rec_tgt_pct_10
0,Cooper Kupp,LAR,21.0,14.0,0.6667,128.0,9.0,0.313,10.0,6.0,0.6000,32.0,6.0,0.323
1,Robert Woods,LAR,16.0,9.0,0.5625,66.0,4.0,0.239,7.0,3.0,0.4286,9.0,2.0,0.226
2,Chris Godwin,TAM,15.0,12.0,0.8000,80.0,4.0,0.250,6.0,4.0,0.6667,19.0,2.0,0.194
3,Keenan Allen,LAC,13.0,8.0,0.6154,43.0,2.0,0.333,6.0,4.0,0.6667,17.0,2.0,0.286
4,Stefon Diggs,BUF,13.0,9.0,0.6923,62.0,3.0,0.255,4.0,3.0,0.7500,11.0,1.0,0.182
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
306,Preston Williams,MIA,1.0,0.0,0.0000,0.0,0.0,0.031,1.0,0.0,0.0000,0.0,0.0,0.053
307,Tyrell Williams,DET,1.0,1.0,1.0000,7.0,0.0,0.034,0.0,0.0,,0.0,0.0,0.000
308,Albert Wilson,MIA,1.0,0.0,0.0000,0.0,0.0,0.031,0.0,0.0,,0.0,0.0,0.000
309,Brandon Zylstra,CAR,1.0,1.0,1.0000,8.0,0.0,0.037,0.0,0.0,,0.0,0.0,0.000


In [88]:
WR_rz.isnull().sum()

player                  1
team                    1
rec_tgt_20              1
rec_20                  1
rec_pct_20              1
rec_yds_20              1
rec_td_20               1
team_rec_tgt_pct_20     3
rec_tgt_10              1
rec_10                  1
rec_pct_10             82
rec_yds_10              1
rec_td_10               1
team_rec_tgt_pct_10     3
dtype: int64

In [89]:
WR_rz.fillna(0, inplace=True)

In [90]:
WR_rz.isnull().sum()

player                 0
team                   0
rec_tgt_20             0
rec_20                 0
rec_pct_20             0
rec_yds_20             0
rec_td_20              0
team_rec_tgt_pct_20    0
rec_tgt_10             0
rec_10                 0
rec_pct_10             0
rec_yds_10             0
rec_td_10              0
team_rec_tgt_pct_10    0
dtype: int64

In [91]:
# drop last row... not sure why this happened
WR_rz.drop([310], inplace=True)

In [93]:
# resave these three dataframes
QB_rz.to_csv('../data/pass_rz.csv')
RB_rz.to_csv('../data/rush_rz.csv')
WR_rz.to_csv('../data/rec_rz.csv')

## Read in all of the team metrics data sets and clean for modeling
---

In [127]:
# read in data sets
conv = pd.read_csv('../data/team_conv.csv')
drive = pd.read_csv('../data/team_drive.csv')
off = pd.read_csv('../data/team_off.csv')
passing = pd.read_csv('../data/team_pass.csv')
rushing = pd.read_csv('../data/team_rush.csv')
score = pd.read_csv('../data/team_score.csv')

In [128]:
conv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   team         32 non-null     object
 1   games        32 non-null     int64 
 2   third_att    32 non-null     int64 
 3   third_conv   32 non-null     int64 
 4   third_pct    32 non-null     object
 5   fourth_att   32 non-null     int64 
 6   fourth_conv  32 non-null     int64 
 7   fourth_pct   32 non-null     object
 8   rz_att       32 non-null     int64 
 9   rz_td        32 non-null     int64 
 10  rz_pct       32 non-null     object
dtypes: int64(7), object(4)
memory usage: 2.9+ KB


In [129]:
conv.head(1)

Unnamed: 0,team,games,third_att,third_conv,third_pct,fourth_att,fourth_conv,fourth_pct,rz_att,rz_td,rz_pct
0,Kansas City Chiefs,9,105,55,52.40%,8,4,50.00%,35,20,57.10%


In [130]:
# pulled this from SO, although I think we covered this in a breakfast hour talk?
# perform to the 3 relevant columns
conv['third_pct'] = conv['third_pct'].str.rstrip('%').astype(float) / 100
conv['fourth_pct'] = conv['fourth_pct'].str.rstrip('%').astype(float) / 100
conv['rz_pct'] = conv['rz_pct'].str.rstrip('%').astype(float) / 100

In [131]:
conv

Unnamed: 0,team,games,third_att,third_conv,third_pct,fourth_att,fourth_conv,fourth_pct,rz_att,rz_td,rz_pct
0,Kansas City Chiefs,9,105,55,0.524,8,4,0.5,35,20,0.571
1,New England Patriots,9,121,54,0.446,6,4,0.667,31,17,0.548
2,Buffalo Bills,8,110,53,0.482,10,3,0.3,34,19,0.559
3,Miami Dolphins,9,123,52,0.423,13,7,0.538,24,15,0.625
4,Tennessee Titans,9,121,51,0.421,14,10,0.714,35,23,0.657
5,Tampa Bay Buccaneers,8,104,51,0.49,8,3,0.375,37,25,0.676
6,Houston Texans,9,126,49,0.389,11,6,0.545,19,10,0.526
7,Philadelphia Eagles,9,107,47,0.439,12,4,0.333,32,23,0.719
8,Atlanta Falcons,8,108,47,0.435,10,4,0.4,27,17,0.63
9,Dallas Cowboys,8,102,47,0.461,14,5,0.357,29,17,0.586


In [132]:
conv.isnull().sum()

team           0
games          0
third_att      0
third_conv     0
third_pct      0
fourth_att     0
fourth_conv    0
fourth_pct     0
rz_att         0
rz_td          0
rz_pct         0
dtype: int64

---

In [133]:
drive.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 11 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   team       32 non-null     object 
 1   games      32 non-null     int64  
 2   drives     32 non-null     int64  
 3   plays      32 non-null     int64  
 4   score_pct  32 non-null     float64
 5   to_pct     32 non-null     float64
 6   plays_avg  32 non-null     float64
 7   yds_avg    32 non-null     float64
 8   start_avg  32 non-null     object 
 9   time_avg   32 non-null     object 
 10  pts_avg    32 non-null     float64
dtypes: float64(5), int64(3), object(3)
memory usage: 2.9+ KB


In [134]:
drive.head(1)

Unnamed: 0,team,games,drives,plays,score_pct,to_pct,plays_avg,yds_avg,start_avg,time_avg,pts_avg
0,Carolina Panthers,9,107,622,30.8,14.0,5.8,26.8,Own 28.6,2:42,1.52


In [136]:
# not sure if we will use 'start_avg' or 'time_avg' just yet so let's hold off

In [137]:
drive.isnull().sum()

team         0
games        0
drives       0
plays        0
score_pct    0
to_pct       0
plays_avg    0
yds_avg      0
start_avg    0
time_avg     0
pts_avg      0
dtype: int64

---

In [138]:
off.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 27 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   team         32 non-null     object 
 1   games        32 non-null     int64  
 2   pt_for       32 non-null     int64  
 3   tot_yds      32 non-null     int64  
 4   tot_ply      32 non-null     int64  
 5   tot_ydp      32 non-null     float64
 6   tot_to       32 non-null     int64  
 7   tot_fumb     32 non-null     int64  
 8   firstd       32 non-null     int64  
 9   pass_cmp     32 non-null     int64  
 10  pass_att     32 non-null     int64  
 11  pass_yds     32 non-null     int64  
 12  pass_td      32 non-null     int64  
 13  pass_int     32 non-null     int64  
 14  pass_netyda  32 non-null     float64
 15  pass_firstd  32 non-null     int64  
 16  rush_att     32 non-null     int64  
 17  rush_yds     32 non-null     int64  
 18  rush_td      32 non-null     int64  
 19  rush_yda  

In [139]:
off.head(1)

Unnamed: 0,team,games,pt_for,tot_yds,tot_ply,tot_ydp,tot_to,tot_fumb,firstd,pass_cmp,...,rush_yds,rush_td,rush_yda,rush_firstd,pen,pen_yds,pen_firstd,score_pct,to_pct,tot_expt
0,Arizona Cardinals,9,277,3586,587,6.1,9,2,205,209,...,1193,15,4.2,74,61,524,17,47.9,8.3,115.13


In [141]:
off.isnull().sum()

team           0
games          0
pt_for         0
tot_yds        0
tot_ply        0
tot_ydp        0
tot_to         0
tot_fumb       0
firstd         0
pass_cmp       0
pass_att       0
pass_yds       0
pass_td        0
pass_int       0
pass_netyda    0
pass_firstd    0
rush_att       0
rush_yds       0
rush_td        0
rush_yda       0
rush_firstd    0
pen            0
pen_yds        0
pen_firstd     0
score_pct      0
to_pct         0
tot_expt       0
dtype: int64

---

In [142]:
passing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 24 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   team             32 non-null     object 
 1   games            32 non-null     int64  
 2   pass_cmp         32 non-null     int64  
 3   pass_att         32 non-null     int64  
 4   pass_pct         32 non-null     float64
 5   pass_yds         32 non-null     int64  
 6   pass_td          32 non-null     int64  
 7   pass_td_pct      32 non-null     float64
 8   pass_int         32 non-null     int64  
 9   pass_int_pct     32 non-null     float64
 10  pass_lng         32 non-null     int64  
 11  pass_yda         32 non-null     float64
 12  pass_ayda        32 non-null     float64
 13  pass_ydc         32 non-null     float64
 14  pass_ydg         32 non-null     float64
 15  qb_rtg           32 non-null     float64
 16  qb_sk            32 non-null     int64  
 17  qb_sack_yds      3

In [144]:
passing.head(1)

Unnamed: 0,team,games,pass_cmp,pass_att,pass_pct,pass_yds,pass_td,pass_td_pct,pass_int,pass_int_pct,...,pass_ydg,qb_rtg,qb_sk,qb_sack_yds,pass_netyda,pass_adj_netyda,qb_sk_pct,qb_cmbk,qb_gwd,pass_expt
0,Los Angeles Rams,9,220,325,67.7,2667,23,7.1,7,2.2,...,296.3,108.7,13,109,7.9,8.3,3.8,2.0,2.0,136.05


In [145]:
passing.isnull().sum()

team               0
games              0
pass_cmp           0
pass_att           0
pass_pct           0
pass_yds           0
pass_td            0
pass_td_pct        0
pass_int           0
pass_int_pct       0
pass_lng           0
pass_yda           0
pass_ayda          0
pass_ydc           0
pass_ydg           0
qb_rtg             0
qb_sk              0
qb_sack_yds        0
pass_netyda        0
pass_adj_netyda    0
qb_sk_pct          0
qb_cmbk            9
qb_gwd             9
pass_expt          0
dtype: int64

In [146]:
passing.fillna(0, inplace=True)

---

In [149]:
rushing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   team       32 non-null     object 
 1   games      32 non-null     int64  
 2   rush_att   32 non-null     int64  
 3   rush_yds   32 non-null     int64  
 4   rush_td    32 non-null     int64  
 5   rush_lng   32 non-null     int64  
 6   rush_yda   32 non-null     float64
 7   rush_ydg   32 non-null     float64
 8   fumb       32 non-null     int64  
 9   rush_expt  32 non-null     float64
dtypes: float64(3), int64(6), object(1)
memory usage: 2.6+ KB


In [150]:
rushing.head(1)

Unnamed: 0,team,games,rush_att,rush_yds,rush_td,rush_lng,rush_yda,rush_ydg,fumb,rush_expt
0,Cleveland Browns,9,273,1442,16,70,5.3,160.2,11,39.33


In [152]:
rushing.isnull().sum()

team         0
games        0
rush_att     0
rush_yds     0
rush_td      0
rush_lng     0
rush_yda     0
rush_ydg     0
fumb         0
rush_expt    0
dtype: int64

---

In [153]:
score.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 20 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   team        32 non-null     object 
 1   games       32 non-null     int64  
 2   rush_td     32 non-null     int64  
 3   rec_td      32 non-null     int64  
 4   pr_td       0 non-null      float64
 5   kr_td       4 non-null      float64
 6   fumb_td     5 non-null      float64
 7   int_td      14 non-null     float64
 8   oth_td      3 non-null      float64
 9   all_td      32 non-null     int64  
 10  two_pt      20 non-null     float64
 11  two_att     32 non-null     int64  
 12  def_two_pt  0 non-null      float64
 13  xp_made     32 non-null     int64  
 14  xp_att      32 non-null     int64  
 15  fg_made     32 non-null     int64  
 16  fg_att      32 non-null     int64  
 17  safety      4 non-null      float64
 18  pts         32 non-null     int64  
 19  pts_g       32 non-null     flo

In [154]:
score.head(1)

Unnamed: 0,team,games,rush_td,rec_td,pr_td,kr_td,fumb_td,int_td,oth_td,all_td,two_pt,two_att,def_two_pt,xp_made,xp_att,fg_made,fg_att,safety,pts,pts_g
0,Buffalo Bills,8,8,17,,,,1.0,,26,1.0,3,,23,23,18,19,,235,29.4


In [155]:
score.isnull().sum()

team           0
games          0
rush_td        0
rec_td         0
pr_td         32
kr_td         28
fumb_td       27
int_td        18
oth_td        29
all_td         0
two_pt        12
two_att        0
def_two_pt    32
xp_made        0
xp_att         0
fg_made        0
fg_att         0
safety        28
pts            0
pts_g          0
dtype: int64

In [156]:
score.fillna(0, inplace=True)

---

In [157]:
# resave these three dataframes
conv.to_csv('../data/team_conv.csv')
drive.to_csv('../data/team_drive.csv')
off.to_csv('../data/team_off.csv')
passing.to_csv('../data/team_pass.csv')
rushing.to_csv('../data/team_rush.csv')
score.to_csv('../data/team_score.csv')

## Read in all of the week 10 matchups by position data sets and clean for modeling
---

In [368]:
# read in data sets
QB10 = pd.read_csv('../data/week_10_QB.csv')
RB10 = pd.read_csv('../data/week_10_RB.csv')
TE10 = pd.read_csv('../data/week_10_TE.csv')
WR10 = pd.read_csv('../data/week_10_WR.csv')

In [369]:
QB10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46 entries, 0 to 45
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   player         46 non-null     object 
 1   team           46 non-null     object 
 2   games          46 non-null     int64  
 3   pass_cmp       46 non-null     float64
 4   pass_att       46 non-null     float64
 5   pass_yds       46 non-null     float64
 6   pass_td        46 non-null     float64
 7   pass_int       46 non-null     float64
 8   sk             46 non-null     float64
 9   rush_att       46 non-null     float64
 10  rush_yds       46 non-null     float64
 11  rush_td        46 non-null     float64
 12  DK_pt_avg      44 non-null     float64
 13  opp_home       25 non-null     object 
 14  opp            46 non-null     object 
 15  opp_rk         46 non-null     int64  
 16  opp_DK_pt_avg  46 non-null     float64
 17  DK_rk          46 non-null     int64  
dtypes: float64(1

In [370]:
QB10.head(1)

Unnamed: 0,player,team,games,pass_cmp,pass_att,pass_yds,pass_td,pass_int,sk,rush_att,rush_yds,rush_td,DK_pt_avg,opp_home,opp,opp_rk,opp_DK_pt_avg,DK_rk
0,Tom Brady,TAM,8,28.9,42.9,331.3,3.1,0.6,1.5,1.8,4.9,0.1,27.6,@,WAS,32,26.8,1


In [371]:
QB10.isnull().sum()

player            0
team              0
games             0
pass_cmp          0
pass_att          0
pass_yds          0
pass_td           0
pass_int          0
sk                0
rush_att          0
rush_yds          0
rush_td           0
DK_pt_avg         2
opp_home         21
opp               0
opp_rk            0
opp_DK_pt_avg     0
DK_rk             0
dtype: int64

In [372]:
# for opponent location change '@' to a 1, then all NaN to 0.
QB10['opp_home'].replace('@', 1, inplace=True)
QB10.fillna(0, inplace=True)
QB10['opp_home'] = QB10['opp_home'].astype(int)

In [373]:
QB10.head(10)

Unnamed: 0,player,team,games,pass_cmp,pass_att,pass_yds,pass_td,pass_int,sk,rush_att,rush_yds,rush_td,DK_pt_avg,opp_home,opp,opp_rk,opp_DK_pt_avg,DK_rk
0,Tom Brady,TAM,8,28.9,42.9,331.3,3.1,0.6,1.5,1.8,4.9,0.1,27.6,1,WAS,32,26.8,1
1,Lamar Jackson,BAL,8,21.6,33.3,276.1,1.6,0.9,3.0,12.1,75.0,0.3,27.3,1,MIA,27,22.1,2
2,Josh Allen,BUF,8,26.1,39.9,279.5,2.1,0.6,1.5,7.1,39.9,0.4,26.4,1,NYJ,17,19.5,4
3,Matthew Stafford,LAR,9,24.3,35.7,307.9,2.6,0.7,1.3,2.4,4.1,0.0,24.2,1,SFO,29,22.1,3
4,Dak Prescott,DAL,7,25.3,36.4,292.1,2.6,0.7,1.4,3.4,12.3,0.0,23.6,0,ATL,29,21.4,6
5,Justin Herbert,LAC,8,26.4,39.9,293.8,2.3,0.8,1.8,3.5,11.9,0.3,24.8,0,MIN,16,20.7,5
6,Derek Carr,LVR,8,26.3,39.0,320.6,1.6,0.9,2.3,2.8,4.3,0.0,20.6,0,KAN,31,24.0,7
7,Kyler Murray,ARI,8,23.3,32.0,284.5,2.1,0.9,2.3,6.1,18.4,0.4,23.8,0,CAR,6,17.5,9
8,Carson Wentz,IND,9,21.1,33.3,244.2,1.9,0.3,1.9,3.2,13.4,0.1,19.2,0,JAX,23,21.4,11
9,Matt Ryan,ATL,8,26.1,37.6,269.6,1.9,0.8,1.8,2.1,4.6,0.1,20.2,1,DAL,26,22.6,8


---

In [330]:
RB10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102 entries, 0 to 101
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   player          102 non-null    object 
 1   team            102 non-null    object 
 2   games           102 non-null    int64  
 3   team_snaps_pct  102 non-null    object 
 4   rush_att_avg    102 non-null    float64
 5   rush_yds_avg    102 non-null    float64
 6   rush_td_avg     102 non-null    float64
 7   rec_tgt_avg     102 non-null    float64
 8   rec_avg         102 non-null    float64
 9   rec_yds_avg     102 non-null    float64
 10  rec_td_avg      102 non-null    float64
 11  DK_pt_avg       101 non-null    float64
 12  opp_home        53 non-null     object 
 13  opp             102 non-null    object 
 14  opp_rk          102 non-null    int64  
 15  opp_DK_pt_avg   102 non-null    float64
 16  DK_rk           102 non-null    int64  
dtypes: float64(9), int64(3), object(5)


In [331]:
RB10.head(1)

Unnamed: 0,player,team,games,team_snaps_pct,rush_att_avg,rush_yds_avg,rush_td_avg,rec_tgt_avg,rec_avg,rec_yds_avg,rec_td_avg,DK_pt_avg,opp_home,opp,opp_rk,opp_DK_pt_avg,DK_rk
0,Najee Harris,PIT,8,464 (85.45%),18.8,67.6,0.5,6.5,5.0,36.1,0.3,20.6,,DET,31,31.2,1


In [332]:
# I may want to consider snap count for RBs, although this may just be a fun exercise
# pulled this from SO
# first get rid of the special characters '(','%',')', then get rid of the leading numbers, then convert to float and divide by 100
RB10['team_snaps_pct'] = RB10['team_snaps_pct'].str.replace('[(,%,)]', '') 
RB10['team_snaps_pct'] = RB10['team_snaps_pct'].str.split().str[1]
RB10['team_snaps_pct'] = RB10['team_snaps_pct'].astype(float) / 100

  RB10['team_snaps_pct'] = RB10['team_snaps_pct'].str.replace('[(,%,)]', '')


In [333]:
RB10.head()

Unnamed: 0,player,team,games,team_snaps_pct,rush_att_avg,rush_yds_avg,rush_td_avg,rec_tgt_avg,rec_avg,rec_yds_avg,rec_td_avg,DK_pt_avg,opp_home,opp,opp_rk,opp_DK_pt_avg,DK_rk
0,Najee Harris,PIT,8,0.8545,18.8,67.6,0.5,6.5,5.0,36.1,0.3,20.6,,DET,31,31.2,1
1,Jonathan Taylor,IND,9,0.5864,15.6,91.2,0.9,3.0,2.6,32.6,0.1,22.5,,JAX,17,23.3,2
2,Austin Ekeler,LAC,8,0.6582,12.6,59.9,0.6,5.6,4.5,40.6,0.4,21.1,,MIN,20,25.9,3
3,Ezekiel Elliott,DAL,8,0.7026,16.0,77.8,0.6,3.6,2.9,19.1,0.1,18.1,,ATL,25,27.6,5
4,Alvin Kamara,NOR,8,0.7838,18.3,66.3,0.4,5.5,4.0,38.8,0.5,20.5,@,TEN,8,21.3,4


In [334]:
RB10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102 entries, 0 to 101
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   player          102 non-null    object 
 1   team            102 non-null    object 
 2   games           102 non-null    int64  
 3   team_snaps_pct  102 non-null    float64
 4   rush_att_avg    102 non-null    float64
 5   rush_yds_avg    102 non-null    float64
 6   rush_td_avg     102 non-null    float64
 7   rec_tgt_avg     102 non-null    float64
 8   rec_avg         102 non-null    float64
 9   rec_yds_avg     102 non-null    float64
 10  rec_td_avg      102 non-null    float64
 11  DK_pt_avg       101 non-null    float64
 12  opp_home        53 non-null     object 
 13  opp             102 non-null    object 
 14  opp_rk          102 non-null    int64  
 15  opp_DK_pt_avg   102 non-null    float64
 16  DK_rk           102 non-null    int64  
dtypes: float64(10), int64(3), object(4)

In [335]:
RB10.isnull().sum()

player             0
team               0
games              0
team_snaps_pct     0
rush_att_avg       0
rush_yds_avg       0
rush_td_avg        0
rec_tgt_avg        0
rec_avg            0
rec_yds_avg        0
rec_td_avg         0
DK_pt_avg          1
opp_home          49
opp                0
opp_rk             0
opp_DK_pt_avg      0
DK_rk              0
dtype: int64

In [336]:
# for opponent location change '@' to a 1, then all NaN to 0.
RB10['opp_home'].replace('@', 1, inplace=True)
RB10.fillna(0, inplace=True)
RB10['opp_home'] = RB10['opp_home'].astype(int)

In [337]:
RB10.head(10)

Unnamed: 0,player,team,games,team_snaps_pct,rush_att_avg,rush_yds_avg,rush_td_avg,rec_tgt_avg,rec_avg,rec_yds_avg,rec_td_avg,DK_pt_avg,opp_home,opp,opp_rk,opp_DK_pt_avg,DK_rk
0,Najee Harris,PIT,8,0.8545,18.8,67.6,0.5,6.5,5.0,36.1,0.3,20.6,0,DET,31,31.2,1
1,Jonathan Taylor,IND,9,0.5864,15.6,91.2,0.9,3.0,2.6,32.6,0.1,22.5,0,JAX,17,23.3,2
2,Austin Ekeler,LAC,8,0.6582,12.6,59.9,0.6,5.6,4.5,40.6,0.4,21.1,0,MIN,20,25.9,3
3,Ezekiel Elliott,DAL,8,0.7026,16.0,77.8,0.6,3.6,2.9,19.1,0.1,18.1,0,ATL,25,27.6,5
4,Alvin Kamara,NOR,8,0.7838,18.3,66.3,0.4,5.5,4.0,38.8,0.5,20.5,1,TEN,8,21.3,4
5,Dalvin Cook,MIN,6,0.7024,19.2,92.3,0.3,3.8,2.5,14.2,0.0,16.5,1,LAC,30,28.4,7
6,Darrell Henderson,LAR,8,0.7529,15.1,70.3,0.6,3.5,2.4,18.0,0.3,16.5,1,SFO,20,25.8,8
7,Aaron Jones,GNB,9,0.6503,12.9,57.3,0.3,4.6,3.7,26.3,0.4,16.9,0,SEA,29,31.0,6
8,Leonard Fournette,TAM,8,0.5907,12.6,54.9,0.5,4.9,3.8,29.9,0.0,15.2,1,WAS,19,23.8,9
9,Melvin Gordon,DEN,9,0.5453,12.1,53.0,0.4,2.7,2.2,18.1,0.2,13.6,0,PHI,27,29.3,11


---

In [338]:
TE10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   player          72 non-null     object 
 1   team            72 non-null     object 
 2   games           72 non-null     int64  
 3   team_snaps_pct  72 non-null     object 
 4   rec_tgt_avg     72 non-null     float64
 5   rec_avg         72 non-null     float64
 6   rec_yds_avg     72 non-null     float64
 7   rec_td_avg      72 non-null     float64
 8   DK_pt_avg       72 non-null     float64
 9   opp_home        37 non-null     object 
 10  opp             72 non-null     object 
 11  opp_rk          72 non-null     int64  
 12  opp_DK_pt_avg   72 non-null     float64
 13  DK_rk           72 non-null     int64  
dtypes: float64(6), int64(3), object(5)
memory usage: 8.0+ KB


In [339]:
TE10.head(1)

Unnamed: 0,player,team,games,team_snaps_pct,rec_tgt_avg,rec_avg,rec_yds_avg,rec_td_avg,DK_pt_avg,opp_home,opp,opp_rk,opp_DK_pt_avg,DK_rk
0,Travis Kelce,KAN,9,552 (85.19%),8.8,6.0,69.8,0.6,16.9,@,LVR,27,17.1,1


In [340]:
# I may want to consider snap count for TEs, although this may just be a fun exercise
# pulled this from SO
# first get rid of the special characters '(','%',')', then get rid of the leading numbers, then convert to float and divide by 100
TE10['team_snaps_pct'] = TE10['team_snaps_pct'].str.replace('[(,%,)]', '') 
TE10['team_snaps_pct'] = TE10['team_snaps_pct'].str.split().str[1]
TE10['team_snaps_pct'] = TE10['team_snaps_pct'].astype(float) / 100

  TE10['team_snaps_pct'] = TE10['team_snaps_pct'].str.replace('[(,%,)]', '')


In [341]:
TE10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   player          72 non-null     object 
 1   team            72 non-null     object 
 2   games           72 non-null     int64  
 3   team_snaps_pct  72 non-null     float64
 4   rec_tgt_avg     72 non-null     float64
 5   rec_avg         72 non-null     float64
 6   rec_yds_avg     72 non-null     float64
 7   rec_td_avg      72 non-null     float64
 8   DK_pt_avg       72 non-null     float64
 9   opp_home        37 non-null     object 
 10  opp             72 non-null     object 
 11  opp_rk          72 non-null     int64  
 12  opp_DK_pt_avg   72 non-null     float64
 13  DK_rk           72 non-null     int64  
dtypes: float64(7), int64(3), object(4)
memory usage: 8.0+ KB


In [342]:
TE10.isnull().sum()

player             0
team               0
games              0
team_snaps_pct     0
rec_tgt_avg        0
rec_avg            0
rec_yds_avg        0
rec_td_avg         0
DK_pt_avg          0
opp_home          35
opp                0
opp_rk             0
opp_DK_pt_avg      0
DK_rk              0
dtype: int64

In [343]:
# for opponent location change '@' to a 1, then all NaN to 0.
TE10['opp_home'].replace('@', 1, inplace=True)
TE10.fillna(0, inplace=True)
TE10['opp_home'] = TE10['opp_home'].astype(int)

In [344]:
TE10.head(10)

Unnamed: 0,player,team,games,team_snaps_pct,rec_tgt_avg,rec_avg,rec_yds_avg,rec_td_avg,DK_pt_avg,opp_home,opp,opp_rk,opp_DK_pt_avg,DK_rk
0,Travis Kelce,KAN,9,0.8519,8.8,6.0,69.8,0.6,16.9,1,LVR,27,17.1,1
1,Mark Andrews,BAL,8,0.7211,7.6,5.3,70.0,0.4,15.8,1,MIA,19,14.1,2
2,Rob Gronkowski,TAM,4,0.6069,5.5,4.0,46.0,1.0,14.6,1,WAS,19,14.1,7
3,Dawson Knox,BUF,6,0.777,4.5,3.5,47.7,0.8,14.1,1,NYJ,25,15.5,6
4,Darren Waller,LVR,7,0.8926,9.1,5.7,67.1,0.3,14.6,0,KAN,29,16.5,3
5,Mike Gesicki,MIA,9,0.7219,7.0,4.9,58.8,0.2,12.7,0,BAL,31,19.2,5
6,Kyle Pitts,ATL,8,0.7421,7.1,4.5,68.3,0.1,12.8,1,DAL,23,14.1,9
7,Dalton Schultz,DAL,8,0.7744,6.1,4.6,53.0,0.4,12.2,0,ATL,17,13.1,10
8,T.J. Hockenson,DET,8,0.8275,8.0,6.0,56.0,0.3,13.4,1,PIT,13,12.0,4
9,George Kittle,SFO,5,0.9242,7.2,5.0,65.6,0.2,13.3,0,LAR,21,14.4,8


---

In [345]:
WR10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   player          149 non-null    object 
 1   team            149 non-null    object 
 2   games           149 non-null    int64  
 3   team_snaps_pct  149 non-null    object 
 4   rec_tgt_avg     149 non-null    float64
 5   rec_avg         149 non-null    float64
 6   rec_yds_avg     149 non-null    float64
 7   rec_td_avg      149 non-null    float64
 8   DK_pt_avg       149 non-null    float64
 9   opp_home        73 non-null     object 
 10  opp             149 non-null    object 
 11  opp_rk          149 non-null    int64  
 12  opp_DK_pt_avg   149 non-null    float64
 13  DK_rk           149 non-null    int64  
dtypes: float64(6), int64(3), object(5)
memory usage: 16.4+ KB


In [346]:
WR10.head(1)

Unnamed: 0,player,team,games,team_snaps_pct,rec_tgt_avg,rec_avg,rec_yds_avg,rec_td_avg,DK_pt_avg,opp_home,opp,opp_rk,opp_DK_pt_avg,DK_rk
0,Cooper Kupp,LAR,9,539 (91.82%),11.4,8.2,113.2,1.1,27.8,@,SFO,9,35.0,1


In [347]:
# I may want to consider snap count for WRs, although this may just be a fun exercise
# pulled this from SO
# first get rid of the special characters '(','%',')', then get rid of the leading numbers, then convert to float and divide by 100
WR10['team_snaps_pct'] = WR10['team_snaps_pct'].str.replace('[(,%,)]', '') 
WR10['team_snaps_pct'] = WR10['team_snaps_pct'].str.split().str[1]
WR10['team_snaps_pct'] = WR10['team_snaps_pct'].astype(float) / 100

  WR10['team_snaps_pct'] = WR10['team_snaps_pct'].str.replace('[(,%,)]', '')


In [348]:
WR10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   player          149 non-null    object 
 1   team            149 non-null    object 
 2   games           149 non-null    int64  
 3   team_snaps_pct  149 non-null    float64
 4   rec_tgt_avg     149 non-null    float64
 5   rec_avg         149 non-null    float64
 6   rec_yds_avg     149 non-null    float64
 7   rec_td_avg      149 non-null    float64
 8   DK_pt_avg       149 non-null    float64
 9   opp_home        73 non-null     object 
 10  opp             149 non-null    object 
 11  opp_rk          149 non-null    int64  
 12  opp_DK_pt_avg   149 non-null    float64
 13  DK_rk           149 non-null    int64  
dtypes: float64(7), int64(3), object(4)
memory usage: 16.4+ KB


In [349]:
WR10.isnull().sum()

player             0
team               0
games              0
team_snaps_pct     0
rec_tgt_avg        0
rec_avg            0
rec_yds_avg        0
rec_td_avg         0
DK_pt_avg          0
opp_home          76
opp                0
opp_rk             0
opp_DK_pt_avg      0
DK_rk              0
dtype: int64

In [350]:
# for opponent location change '@' to a 1, then all NaN to 0.
WR10['opp_home'].replace('@', 1, inplace=True)
WR10.fillna(0, inplace=True)
WR10['opp_home'] = WR10['opp_home'].astype(int)

In [351]:
WR10.head(10)

Unnamed: 0,player,team,games,team_snaps_pct,rec_tgt_avg,rec_avg,rec_yds_avg,rec_td_avg,DK_pt_avg,opp_home,opp,opp_rk,opp_DK_pt_avg,DK_rk
0,Cooper Kupp,LAR,9,0.9182,11.4,8.2,113.2,1.1,27.8,1,SFO,9,35.0,1
1,Deebo Samuel,SFO,8,0.8481,10.1,6.1,110.3,0.5,22.4,0,LAR,8,36.5,2
2,Marquise Brown,BAL,8,0.7585,8.6,5.8,85.3,0.8,20.0,1,MIA,28,43.7,4
3,Davante Adams,GNB,8,0.8617,10.9,7.3,98.3,0.4,20.5,0,SEA,12,37.1,3
4,D.K. Metcalf,SEA,8,0.8217,7.0,4.9,72.5,1.0,18.5,1,GNB,6,32.2,9
5,Chris Godwin,TAM,8,0.8932,8.6,6.3,82.5,0.5,19.3,1,WAS,31,43.8,7
6,Tyreek Hill,KAN,9,0.7824,11.2,7.6,85.8,0.7,21.3,1,LVR,3,28.4,5
7,Mike Evans,TAM,8,0.8523,7.9,4.9,68.0,1.0,18.4,1,WAS,31,43.8,14
8,Michael Pittman Jr.,IND,9,0.9153,7.9,5.6,73.1,0.6,17.0,0,JAX,22,41.8,8
9,Antonio Brown,TAM,5,0.5324,8.4,5.8,83.6,0.8,20.3,1,WAS,31,43.8,13


---

In [374]:
# resave these three dataframes
QB10.to_csv('../data/week_10_QB.csv')
RB10.to_csv('../data/week_10_RB.csv')
TE10.to_csv('../data/week_10_TE.csv')
WR10.to_csv('../data/week_10_WR.csv')

## Read in the DK total rankings for 9 weeks this season data set and clean for modeling
---

In [290]:
# read in data set
DK_tot = pd.read_csv('../data/DK_total_rank.csv')

In [285]:
DK_tot.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 27 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   player      500 non-null    object 
 1   team        500 non-null    object 
 2   pos         500 non-null    object 
 3   games       500 non-null    int64  
 4   gstart      500 non-null    int64  
 5   pass_cmp    500 non-null    int64  
 6   pass_att    500 non-null    int64  
 7   pass_yds    500 non-null    int64  
 8   pass_td     500 non-null    int64  
 9   pass_int    500 non-null    int64  
 10  rush_att    500 non-null    int64  
 11  rush_yds    500 non-null    int64  
 12  rush_yda    281 non-null    float64
 13  rush_td     500 non-null    int64  
 14  rec_tgt     500 non-null    int64  
 15  rec         500 non-null    int64  
 16  rec_yds     500 non-null    int64  
 17  rec_ydr     418 non-null    float64
 18  rec_td      500 non-null    int64  
 19  fumb        500 non-null    i

In [286]:
DK_tot.isnull().sum()

player          0
team            0
pos             0
games           0
gstart          0
pass_cmp        0
pass_att        0
pass_yds        0
pass_td         0
pass_int        0
rush_att        0
rush_yds        0
rush_yda      219
rush_td         0
rec_tgt         0
rec             0
rec_yds         0
rec_ydr        82
rec_td          0
fumb            0
fumb_loss       0
tot_td          0
tot_two_pt      0
DK_pt           0
VBD           425
pos_rk          0
ovr_rk        423
dtype: int64

In [287]:
DK_tot.fillna(0, inplace=True)

In [288]:
DK_tot.head(1)

Unnamed: 0,player,team,pos,games,gstart,pass_cmp,pass_att,pass_yds,pass_td,pass_int,...,rec_ydr,rec_td,fumb,fumb_loss,tot_td,tot_two_pt,DK_pt,VBD,pos_rk,ovr_rk
0,Cooper Kupp,LAR,WR,9,9,0,0,0,0,0,...,13.77,10,0,0,10,0,238.4,95.0,1,2.0


In [289]:
# resave this dataframe
DK_tot.to_csv('../data/DK_total_rank.csv')

## Read in the DK weekly contest results data sets and clean for modeling
---

In [355]:
# read in data sets
DK_wk2 = pd.read_csv('../data/DKresults_week_2.csv', index_col=[0])
DK_wk3 = pd.read_csv('../data/DKresults_week_3.csv', index_col=[0])
DK_wk4 = pd.read_csv('../data/DKresults_week_4.csv', index_col=[0])
DK_wk5 = pd.read_csv('../data/DKresults_week_5.csv', index_col=[0])
DK_wk6 = pd.read_csv('../data/DKresults_week_6.csv', index_col=[0])
DK_wk7 = pd.read_csv('../data/DKresults_week_7.csv', index_col=[0])
DK_wk8 = pd.read_csv('../data/DKresults_week_8.csv', index_col=[0])
DK_wk9 = pd.read_csv('../data/DKresults_week_9.csv', index_col=[0])
DK_wk10 = pd.read_csv('../data/DKresults_week_10.csv', index_col=[0])

In [356]:
DK_wk10.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 417 entries, 0 to 416
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   player     417 non-null    object 
 1   pos        417 non-null    object 
 2   draft_pct  417 non-null    float64
 3   DK_pt      417 non-null    float64
dtypes: float64(2), object(2)
memory usage: 16.3+ KB


In [357]:
DK_wk10.head(1)

Unnamed: 0,player,pos,draft_pct,DK_pt
0,D'Ernest Johnson,RB,0.4843,22.7


In [252]:
# knowing that all data sets are the same, do this all together
# pulled this from SO, although I think we covered this in a breakfast hour talk?
DK_wk2['draft_pct'] = DK_wk2['draft_pct'].str.rstrip('%').astype(float) / 100
DK_wk3['draft_pct'] = DK_wk3['draft_pct'].str.rstrip('%').astype(float) / 100
DK_wk4['draft_pct'] = DK_wk4['draft_pct'].str.rstrip('%').astype(float) / 100
DK_wk5['draft_pct'] = DK_wk5['draft_pct'].str.rstrip('%').astype(float) / 100
DK_wk6['draft_pct'] = DK_wk6['draft_pct'].str.rstrip('%').astype(float) / 100
DK_wk7['draft_pct'] = DK_wk7['draft_pct'].str.rstrip('%').astype(float) / 100
DK_wk8['draft_pct'] = DK_wk8['draft_pct'].str.rstrip('%').astype(float) / 100
DK_wk9['draft_pct'] = DK_wk9['draft_pct'].str.rstrip('%').astype(float) / 100
DK_wk10['draft_pct'] = DK_wk10['draft_pct'].str.rstrip('%').astype(float) / 100

In [358]:
DK_wk2.isnull().sum()

player       0
pos          0
draft_pct    0
DK_pt        0
dtype: int64

In [359]:
DK_wk3.isnull().sum()

player       0
pos          0
draft_pct    0
DK_pt        0
dtype: int64

In [360]:
DK_wk4.isnull().sum()

player       0
pos          0
draft_pct    0
DK_pt        0
dtype: int64

In [361]:
DK_wk5.isnull().sum()

player       0
pos          0
draft_pct    0
DK_pt        0
dtype: int64

In [362]:
DK_wk6.isnull().sum()

player       0
pos          0
draft_pct    0
DK_pt        0
dtype: int64

In [363]:
DK_wk7.isnull().sum()

player       0
pos          0
draft_pct    0
DK_pt        0
dtype: int64

In [364]:
DK_wk8.isnull().sum()

player       0
pos          0
draft_pct    0
DK_pt        0
dtype: int64

In [365]:
DK_wk9.isnull().sum()

player       0
pos          0
draft_pct    0
DK_pt        0
dtype: int64

In [366]:
DK_wk10.isnull().sum()

player       0
pos          0
draft_pct    0
DK_pt        0
dtype: int64

In [367]:
# resave these dataframes
DK_wk2.to_csv('../data/DKresults_week_2.csv')
DK_wk3.to_csv('../data/DKresults_week_3.csv')
DK_wk4.to_csv('../data/DKresults_week_4.csv')
DK_wk5.to_csv('../data/DKresults_week_5.csv')
DK_wk6.to_csv('../data/DKresults_week_6.csv')
DK_wk7.to_csv('../data/DKresults_week_7.csv')
DK_wk8.to_csv('../data/DKresults_week_8.csv')
DK_wk9.to_csv('../data/DKresults_week_9.csv')
DK_wk10.to_csv('../data/DKresults_week_10.csv')

## Read in team individual game stats data set and clean for modeling
---

In [408]:
# read in data sets
game = pd.read_csv('../data/team_game.csv', index_col=[0])

In [409]:
game

Unnamed: 0,team,opp_home,opp,week,slot,ot,pass_cmp,pass_att,pass_cmp_pct,pass_yds,...,third_cmp,third_pct,fourth_att,fourth_cmp,fourth_pct,off_result,off_score,opp_score,pass_pts,rush_pts
0,KAN,1.0,PHI,4,3,0.0,24,30,80.0,271,...,9,0.900,0,0,0.000,1,42,30,29.84,26.0
1,NOR,0.0,GNB,1,4,0.0,15,21,71.4,151,...,5,0.500,2,2,1.000,1,38,3,26.04,17.1
2,TAM,0.0,ATL,2,4,0.0,24,36,66.7,259,...,4,0.333,2,1,0.500,1,48,25,30.36,8.2
3,TAM,0.0,MIA,5,3,0.0,33,44,75.0,437,...,8,0.727,0,0,0.000,1,45,17,37.48,18.1
4,ATL,0.0,WAS,4,3,0.0,25,42,59.5,275,...,10,0.625,0,0,0.000,0,30,34,27.00,9.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267,LVR,0.0,CHI,5,4,0.0,22,35,62.9,188,...,5,0.357,3,1,0.333,0,9,20,6.52,13.1
268,SFO,1.0,ARI,5,4,0.0,15,29,51.7,186,...,3,0.273,5,1,0.200,0,10,17,6.44,21.2
269,SFO,1.0,CHI,8,3,0.0,17,28,60.7,322,...,4,0.400,1,1,1.000,1,33,22,12.88,32.5
270,TAM,1.0,NWE,4,5,0.0,22,43,51.2,261,...,9,0.474,0,0,0.000,1,19,17,10.44,18.0


In [285]:
# convert 'off_time' which is time of possession to total seconds
game['off_time'] = game['off_time'].str.split(':')
game['off_time'] = game['off_time'].str[0].astype(int)*60 + game['off_time'].str[1].astype(int)
game.head()

Unnamed: 0,team,opp_home,opp,week,slot,result,ot,pass_cmp,pass_att,pass_cmp_pct,...,first,rush_first,pass_first,Pen,third_att,third_cmp,third_pct,fourth_att,fourth_cmp,fourth_pct
0,KAN,@,PHI,4,3,W 42-30,,24,30,80.0,...,31,12,16,3,10,9,90.00%,0,0,
1,NOR,,GNB,1,4,W 38-3,,15,21,71.4,...,22,11,10,1,10,5,50.00%,2,2,100.00%
2,TAM,,ATL,2,4,W 48-25,,24,36,66.7,...,21,5,14,2,12,4,33.30%,2,1,50.00%
3,TAM,,MIA,5,3,W 45-17,,33,44,75.0,...,33,5,26,2,11,8,72.70%,0,0,
4,ATL,,WAS,4,3,L 30-34,,25,42,59.5,...,24,6,15,3,16,10,62.50%,0,0,


In [286]:
# pulled this from SO, although I think we covered this in a breakfast hour talk?
# perform to the 2 relevant columns
game['third_pct'] = game['third_pct'].str.rstrip('%').astype(float) / 100
game['fourth_pct'] = game['fourth_pct'].str.rstrip('%').astype(float) / 100

In [287]:
game.head()

Unnamed: 0,team,opp_home,opp,week,slot,result,ot,pass_cmp,pass_att,pass_cmp_pct,...,first,rush_first,pass_first,Pen,third_att,third_cmp,third_pct,fourth_att,fourth_cmp,fourth_pct
0,KAN,@,PHI,4,3,W 42-30,,24,30,80.0,...,31,12,16,3,10,9,0.9,0,0,
1,NOR,,GNB,1,4,W 38-3,,15,21,71.4,...,22,11,10,1,10,5,0.5,2,2,1.0
2,TAM,,ATL,2,4,W 48-25,,24,36,66.7,...,21,5,14,2,12,4,0.333,2,1,0.5
3,TAM,,MIA,5,3,W 45-17,,33,44,75.0,...,33,5,26,2,11,8,0.727,0,0,
4,ATL,,WAS,4,3,L 30-34,,25,42,59.5,...,24,6,15,3,16,10,0.625,0,0,


In [288]:
game.isnull().sum()

team              0
opp_home        136
opp               0
week              0
slot              0
result            0
ot              246
pass_cmp          0
pass_att          0
pass_cmp_pct      0
pass_yds          0
pass_td           0
pass_int          0
sk                0
sk_yds            0
pass_rtg          0
rush_att          0
rush_yds          0
rush_yda          0
rush_td           0
tot_yds           0
plays             0
yds_play          0
to               78
off_time          0
first             0
rush_first        0
pass_first        0
Pen               0
third_att         0
third_cmp         0
third_pct         0
fourth_att        0
fourth_cmp        0
fourth_pct       78
dtype: int64

In [289]:
# for opponent location change '@' to a 1 and NaN to 0.
game['opp_home'].replace('@', 1, inplace=True)
game['opp_home'].fillna(0, inplace=True)

# do the same for overtime
game['ot'].replace('OT', 1, inplace=True)
game['ot'].fillna(0, inplace=True)

# take care of the rest of the nulls
game['to'].fillna(0, inplace=True)
game['fourth_pct'].fillna(0, inplace=True)

In [290]:
# break up the 'result' into a win/loss column for the offense, offense score, and defense score
game['off_result'] = game['result'].str.split().str[0]
game['off_score'] = game['result'].str.split().str[1]
game['off_score'] = game['off_score'].str.replace('-', ' ')
game['opp_score'] = game['off_score'].str.split().str[1].astype(int)
game['off_score'] = game['off_score'].str.split().str[0].astype(int)
# convert wins 'W' and losses 'L' to 1 and 0
game['off_result'] = game['off_result'].map({'W':1 ,'L':0})
game.drop(columns='result', inplace=True)
game.head()

Unnamed: 0,team,opp_home,opp,week,slot,ot,pass_cmp,pass_att,pass_cmp_pct,pass_yds,...,Pen,third_att,third_cmp,third_pct,fourth_att,fourth_cmp,fourth_pct,off_result,off_score,opp_score
0,KAN,1.0,PHI,4,3,0.0,24,30,80.0,271,...,3,10,9,0.9,0,0,0.0,1,42,30
1,NOR,0.0,GNB,1,4,0.0,15,21,71.4,151,...,1,10,5,0.5,2,2,1.0,1,38,3
2,TAM,0.0,ATL,2,4,0.0,24,36,66.7,259,...,2,12,4,0.333,2,1,0.5,1,48,25
3,TAM,0.0,MIA,5,3,0.0,33,44,75.0,437,...,2,11,8,0.727,0,0,0.0,1,45,17
4,ATL,0.0,WAS,4,3,0.0,25,42,59.5,275,...,3,16,10,0.625,0,0,0.0,0,30,34


In [291]:
game.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 272 entries, 0 to 271
Data columns (total 37 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   team          272 non-null    object 
 1   opp_home      272 non-null    float64
 2   opp           272 non-null    object 
 3   week          272 non-null    int64  
 4   slot          272 non-null    int64  
 5   ot            272 non-null    float64
 6   pass_cmp      272 non-null    int64  
 7   pass_att      272 non-null    int64  
 8   pass_cmp_pct  272 non-null    float64
 9   pass_yds      272 non-null    int64  
 10  pass_td       272 non-null    int64  
 11  pass_int      272 non-null    int64  
 12  sk            272 non-null    int64  
 13  sk_yds        272 non-null    int64  
 14  pass_rtg      272 non-null    float64
 15  rush_att      272 non-null    int64  
 16  rush_yds      272 non-null    int64  
 17  rush_yda      272 non-null    float64
 18  rush_td       272 non-null    

In [292]:
game.head()

Unnamed: 0,team,opp_home,opp,week,slot,ot,pass_cmp,pass_att,pass_cmp_pct,pass_yds,...,Pen,third_att,third_cmp,third_pct,fourth_att,fourth_cmp,fourth_pct,off_result,off_score,opp_score
0,KAN,1.0,PHI,4,3,0.0,24,30,80.0,271,...,3,10,9,0.9,0,0,0.0,1,42,30
1,NOR,0.0,GNB,1,4,0.0,15,21,71.4,151,...,1,10,5,0.5,2,2,1.0,1,38,3
2,TAM,0.0,ATL,2,4,0.0,24,36,66.7,259,...,2,12,4,0.333,2,1,0.5,1,48,25
3,TAM,0.0,MIA,5,3,0.0,33,44,75.0,437,...,2,11,8,0.727,0,0,0.0,1,45,17
4,ATL,0.0,WAS,4,3,0.0,25,42,59.5,275,...,3,16,10,0.625,0,0,0.0,0,30,34


In [293]:
# need to add fantasy points column for QB and RB
game['pass_pts'] = (game['pass_td']*4) + (game['pass_yds']*0.04) - game['pass_int']
game['rush_pts'] = game['rush_yds']*0.1 + game['rush_td']*6

In [294]:
game

Unnamed: 0,team,opp_home,opp,week,slot,ot,pass_cmp,pass_att,pass_cmp_pct,pass_yds,...,third_cmp,third_pct,fourth_att,fourth_cmp,fourth_pct,off_result,off_score,opp_score,pass_pts,rush_pts
0,KAN,1.0,PHI,4,3,0.0,24,30,80.0,271,...,9,0.900,0,0,0.000,1,42,30,29.84,26.0
1,NOR,0.0,GNB,1,4,0.0,15,21,71.4,151,...,5,0.500,2,2,1.000,1,38,3,26.04,17.1
2,TAM,0.0,ATL,2,4,0.0,24,36,66.7,259,...,4,0.333,2,1,0.500,1,48,25,30.36,8.2
3,TAM,0.0,MIA,5,3,0.0,33,44,75.0,437,...,8,0.727,0,0,0.000,1,45,17,37.48,18.1
4,ATL,0.0,WAS,4,3,0.0,25,42,59.5,275,...,10,0.625,0,0,0.000,0,30,34,27.00,9.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267,LVR,0.0,CHI,5,4,0.0,22,35,62.9,188,...,5,0.357,3,1,0.333,0,9,20,6.52,13.1
268,SFO,1.0,ARI,5,4,0.0,15,29,51.7,186,...,3,0.273,5,1,0.200,0,10,17,6.44,21.2
269,SFO,1.0,CHI,8,3,0.0,17,28,60.7,322,...,4,0.400,1,1,1.000,1,33,22,12.88,32.5
270,TAM,1.0,NWE,4,5,0.0,22,43,51.2,261,...,9,0.474,0,0,0.000,1,19,17,10.44,18.0


In [295]:
# resave this dataframe
game.to_csv('../data/team_game.csv')

## Read in player individual game stats by week data set and clean for modeling
---

In [179]:
# read in data sets
qb_game = pd.read_csv('../data/QB_season_9.csv')

In [180]:
qb_game

Unnamed: 0,player,pos,team,opp_home,opp,result,game,week,day,pass_cmp,...,pass_yda,pass_ayda,dk_pt,rush_att,rush_yds,rush_td,rec,rec_yds,rec_td,fmb
0,Lamar Jackson,QB,BAL,,IND,W 31-25,5,5,Mon,37,...,10.28,12.14,45.9,14,62,0,0,0,0,1
1,Justin Herbert,QB,LAC,,CLE,W 47-42,5,5,Sun,26,...,9.26,11.12,45.8,4,29,1,0,0,0,0
2,Tom Brady,QB,TAM,,MIA,W 45-17,5,5,Sun,30,...,10.02,12.46,40.7,1,13,0,0,0,0,0
3,Josh Allen,QB,BUF,,WAS,W 43-21,3,3,Sun,32,...,8.33,10.19,40.2,4,9,1,0,0,0,0
4,Josh Allen,QB,BUF,@,KAN,W 38-20,5,5,Sun,15,...,12.12,14.42,39.5,11,59,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
301,Taysom Hill,QB,NOR,,GNB,W 38-3,1,1,Sun,1,...,3.00,3.00,0.2,2,1,0,0,0,0,0
302,Case Keenum,QB,CLE,,ARI,L 14-37,6,6,Sun,1,...,2.00,2.00,0.2,0,0,0,0,0,0,0
303,John Wolford,QB,LAR,@,HOU,W 38-22,8,8,Sun,1,...,2.50,2.50,0.2,0,0,0,0,0,0,0
304,Jacob Eason,QB,IND,,LAR,L 24-27,2,2,Sun,2,...,5.00,-4.00,,0,0,0,0,0,0,0


In [181]:
qb_game.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 306 entries, 0 to 305
Data columns (total 28 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   player     306 non-null    object 
 1   pos        306 non-null    object 
 2   team       306 non-null    object 
 3   opp_home   150 non-null    object 
 4   opp        306 non-null    object 
 5   result     306 non-null    object 
 6   game       306 non-null    int64  
 7   week       306 non-null    int64  
 8   day        306 non-null    object 
 9   pass_cmp   306 non-null    int64  
 10  pass_att   306 non-null    int64  
 11  pass_pct   306 non-null    float64
 12  pass_yds   306 non-null    int64  
 13  pass_td    306 non-null    int64  
 14  pass_int   306 non-null    int64  
 15  pass_rtg   306 non-null    float64
 16  sk         306 non-null    int64  
 17  sk_yds     306 non-null    int64  
 18  pass_yda   306 non-null    float64
 19  pass_ayda  306 non-null    float64
 20  dk_pt     

In [182]:
qb_game.isnull().sum()

player         0
pos            0
team           0
opp_home     156
opp            0
result         0
game           0
week           0
day            0
pass_cmp       0
pass_att       0
pass_pct       0
pass_yds       0
pass_td        0
pass_int       0
pass_rtg       0
sk             0
sk_yds         0
pass_yda       0
pass_ayda      0
dk_pt          2
rush_att       0
rush_yds       0
rush_td        0
rec            0
rec_yds        0
rec_td         0
fmb            0
dtype: int64

In [183]:
# for opponent location change '@' to a 1 and NaN to 0.
qb_game['opp_home'].replace('@', 1, inplace=True)
qb_game['opp_home'].fillna(0, inplace=True)

# convert NaN to 0
qb_game['dk_pt'].fillna(0, inplace=True)

In [184]:
# break up the 'result' into a win/loss column for the offense, offense score, and defense score
# convert wins 'W' and losses 'L' to 1 and 0
qb_game['team_win'] = qb_game['result'].str.split().str[0]
qb_game['team_win'] = qb_game['team_win'].map({'W':1 ,'L':0})
qb_game['team_score'] = qb_game['result'].str.split().str[1]
qb_game['team_score'] = qb_game['team_score'].str.replace('-', ' ')
qb_game['opp_score'] = qb_game['team_score'].str.split().str[1].astype(int)
qb_game['team_score'] = qb_game['team_score'].str.split().str[0].astype(int)
qb_game.drop(columns=['result'], inplace=True)
qb_game.head()

Unnamed: 0,player,pos,team,opp_home,opp,game,week,day,pass_cmp,pass_att,...,rush_att,rush_yds,rush_td,rec,rec_yds,rec_td,fmb,team_win,team_score,opp_score
0,Lamar Jackson,QB,BAL,0.0,IND,5,5,Mon,37,43,...,14,62,0,0,0,0,1,1,31,25
1,Justin Herbert,QB,LAC,0.0,CLE,5,5,Sun,26,43,...,4,29,1,0,0,0,0,1,47,42
2,Tom Brady,QB,TAM,0.0,MIA,5,5,Sun,30,41,...,1,13,0,0,0,0,0,1,45,17
3,Josh Allen,QB,BUF,0.0,WAS,3,3,Sun,32,43,...,4,9,1,0,0,0,0,1,43,21
4,Josh Allen,QB,BUF,1.0,KAN,5,5,Sun,15,26,...,11,59,1,0,0,0,0,1,38,20


In [186]:
# resave this dataframe
qb_game.to_csv('../data/QB_season_9.csv')

---

In [187]:
# read in data sets
rb_game = pd.read_csv('../data/RB_season_9.csv')

In [188]:
rb_game

Unnamed: 0,player,pos,team,opp_home,opp,result,game,week,day,rush_att,...,rec_td,rec_pct,rec_ydtgt,dk_pt,pass_cmp,pass_att,pass_yds,pass_td,pass_int,fmb
0,Derrick Henry,RB,TEN,@,SEA,W 33-30,2,2,Sun,35,...,0,100.00%,9.17,50.7,0,0,0,0,0,0
1,Aaron Jones,RB,GNB,,DET,W 35-17,2,2,Mon,17,...,3,100.00%,8.00,41.5,0,0,0,0,0,0
2,James Conner,RB,ARI,@,SFO,W 31-17,9,9,Sun,21,...,1,100.00%,15.40,40.3,0,0,0,0,0,1
3,Derrick Henry,RB,TEN,,BUF,W 34-31,6,6,Mon,20,...,0,66.70%,4.33,38.6,0,0,0,0,0,0
4,Jonathan Taylor,RB,IND,,NYJ,W 45-30,9,9,Thu,19,...,0,100.00%,14.00,37.0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
656,Peyton Barber,RB,LVR,@,LAC,L 14-28,4,4,Mon,1,...,0,0.00%,,,0,0,0,0,0,0
657,Corey Clement,RB,DAL,,PHI,W 41-21,3,3,Mon,3,...,0,0.00%,,,0,0,0,0,0,0
658,Justin Jackson,RB,LAC,@,KAN,W 30-24,3,3,Sun,2,...,0,0.00%,,,0,0,0,0,0,0
659,Benny Snell Jr.,RB,PIT,@,GNB,L 17-27,4,4,Sun,1,...,0,0.00%,0.00,,0,0,0,0,0,0


In [189]:
rb_game.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 661 entries, 0 to 660
Data columns (total 27 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   player     661 non-null    object 
 1   pos        661 non-null    object 
 2   team       661 non-null    object 
 3   opp_home   321 non-null    object 
 4   opp        661 non-null    object 
 5   result     661 non-null    object 
 6   game       661 non-null    int64  
 7   week       661 non-null    int64  
 8   day        661 non-null    object 
 9   rush_att   661 non-null    int64  
 10  rush_yds   661 non-null    int64  
 11  rush_yda   661 non-null    float64
 12  rush_td    661 non-null    int64  
 13  rec_tgt    661 non-null    int64  
 14  rec        661 non-null    int64  
 15  rec_yds    661 non-null    int64  
 16  rec_ydr    467 non-null    float64
 17  rec_td     661 non-null    int64  
 18  rec_pct    661 non-null    object 
 19  rec_ydtgt  498 non-null    float64
 20  dk_pt     

In [190]:
rb_game.isnull().sum()

player         0
pos            0
team           0
opp_home     340
opp            0
result         0
game           0
week           0
day            0
rush_att       0
rush_yds       0
rush_yda       0
rush_td        0
rec_tgt        0
rec            0
rec_yds        0
rec_ydr      194
rec_td         0
rec_pct        0
rec_ydtgt    163
dk_pt          6
pass_cmp       0
pass_att       0
pass_yds       0
pass_td        0
pass_int       0
fmb            0
dtype: int64

In [191]:
# for opponent location change '@' to a 1 and NaN to 0.
rb_game['opp_home'].replace('@', 1, inplace=True)
rb_game['opp_home'].fillna(0, inplace=True)

# convert NaN to 0
rb_game['dk_pt'].fillna(0, inplace=True)
rb_game['rec_ydr'].fillna(0, inplace=True)
rb_game['rec_ydtgt'].fillna(0, inplace=True)

In [192]:
# break up the 'result' into a win/loss column for the offense, offense score, and defense score
# convert wins 'W' and losses 'L' to 1 and 0
rb_game['team_win'] = rb_game['result'].str.split().str[0]
rb_game['team_win'] = rb_game['team_win'].map({'W':1 ,'L':0})
rb_game['team_score'] = rb_game['result'].str.split().str[1]
rb_game['team_score'] = rb_game['team_score'].str.replace('-', ' ')
rb_game['opp_score'] = rb_game['team_score'].str.split().str[1].astype(int)
rb_game['team_score'] = rb_game['team_score'].str.split().str[0].astype(int)
rb_game.drop(columns=['result'], inplace=True)
rb_game.head()

Unnamed: 0,player,pos,team,opp_home,opp,game,week,day,rush_att,rush_yds,...,dk_pt,pass_cmp,pass_att,pass_yds,pass_td,pass_int,fmb,team_win,team_score,opp_score
0,Derrick Henry,RB,TEN,1.0,SEA,2,2,Sun,35,182,...,50.7,0,0,0,0,0,0,1,33,30
1,Aaron Jones,RB,GNB,0.0,DET,2,2,Mon,17,67,...,41.5,0,0,0,0,0,0,1,35,17
2,James Conner,RB,ARI,1.0,SFO,9,9,Sun,21,96,...,40.3,0,0,0,0,0,1,1,31,17
3,Derrick Henry,RB,TEN,0.0,BUF,6,6,Mon,20,143,...,38.6,0,0,0,0,0,0,1,34,31
4,Jonathan Taylor,RB,IND,0.0,NYJ,9,9,Thu,19,172,...,37.0,0,0,0,0,0,1,1,45,30


In [194]:
# resave this dataframe
rb_game.to_csv('../data/RB_season_9.csv')

---

In [208]:
# read in data sets
te_game = pd.read_csv('../data/TE_season_9.csv')

In [209]:
te_game

Unnamed: 0,player,pos,team,opp_home,opp,result,game,week,day,rec_tgt,...,dk_pt,pass_cmp,pass_att,pass_yds,pass_td,pass_int,rush_att,rush_yds,rush_td,fmb
0,Mark Andrews,TE,BAL,,IND,W 31-25,5,5,Mon,13,...,44.7,0,0,0,0,0,0,0,0,0
1,David Njoku,TE,CLE,@,LAC,L 42-47,5,5,Sun,7,...,30.9,0,0,0,0,0,0,0,0,0
2,Kyle Pitts,TE,ATL,,NYJ,W 27-20,5,5,Sun,10,...,29.9,0,0,0,0,0,0,0,0,0
3,Darren Waller,TE,LVR,,BAL,W 33-27,1,1,Mon,19,...,29.5,0,0,0,0,0,0,0,0,0
4,Rob Gronkowski,TE,TAM,,DAL,W 31-29,1,1,Thu,8,...,29.0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
536,Tommy Tremble,TE,CAR,@,DAL,L 28-36,4,4,Sun,1,...,,0,0,0,0,0,0,0,0,0
537,C.J. Uzomah,TE,CIN,@,PIT,W 24-10,3,3,Sun,1,...,,0,0,0,0,0,0,0,0,0
538,Trevon Wesco,TE,NYJ,,TEN,W 27-24,4,4,Sun,1,...,,0,0,0,0,0,0,0,0,0
539,Trevon Wesco,TE,NYJ,@,NWE,L 13-54,6,7,Sun,1,...,,0,0,0,0,0,0,0,0,0


In [210]:
te_game.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541 entries, 0 to 540
Data columns (total 26 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   player     541 non-null    object 
 1   pos        541 non-null    object 
 2   team       541 non-null    object 
 3   opp_home   281 non-null    object 
 4   opp        541 non-null    object 
 5   result     541 non-null    object 
 6   game       541 non-null    int64  
 7   week       541 non-null    int64  
 8   day        541 non-null    object 
 9   rec_tgt    541 non-null    int64  
 10  rec        541 non-null    int64  
 11  rec_yds    541 non-null    int64  
 12  rec_ydr    484 non-null    float64
 13  rec_td     541 non-null    int64  
 14  rec_pct    541 non-null    object 
 15  rec_ydtgt  541 non-null    float64
 16  dk_pt      485 non-null    float64
 17  pass_cmp   541 non-null    int64  
 18  pass_att   541 non-null    int64  
 19  pass_yds   541 non-null    int64  
 20  pass_td   

In [211]:
te_game.isnull().sum()

player         0
pos            0
team           0
opp_home     260
opp            0
result         0
game           0
week           0
day            0
rec_tgt        0
rec            0
rec_yds        0
rec_ydr       57
rec_td         0
rec_pct        0
rec_ydtgt      0
dk_pt         56
pass_cmp       0
pass_att       0
pass_yds       0
pass_td        0
pass_int       0
rush_att       0
rush_yds       0
rush_td        0
fmb            0
dtype: int64

In [212]:
# for opponent location change '@' to a 1 and NaN to 0.
te_game['opp_home'].replace('@', 1, inplace=True)
te_game['opp_home'].fillna(0, inplace=True)

# convert NaN to 0
te_game['dk_pt'].fillna(0, inplace=True)
te_game['rec_ydr'].fillna(0, inplace=True)

In [213]:
# break up the 'result' into a win/loss column for the offense, offense score, and defense score
# convert wins 'W' and losses 'L' to 1 and 0
te_game['team_win'] = te_game['result'].str.split().str[0]
te_game['team_win'] = te_game['team_win'].map({'W':1 ,'L':0})
te_game['team_score'] = te_game['result'].str.split().str[1]
te_game['team_score'] = te_game['team_score'].str.replace('-', ' ')
te_game['opp_score'] = te_game['team_score'].str.split().str[1].astype(int)
te_game['team_score'] = te_game['team_score'].str.split().str[0].astype(int)
te_game.drop(columns=['result'], inplace=True)
te_game.head()

Unnamed: 0,player,pos,team,opp_home,opp,game,week,day,rec_tgt,rec,...,pass_yds,pass_td,pass_int,rush_att,rush_yds,rush_td,fmb,team_win,team_score,opp_score
0,Mark Andrews,TE,BAL,0.0,IND,5,5,Mon,13,11,...,0,0,0,0,0,0,0,1,31,25
1,David Njoku,TE,CLE,1.0,LAC,5,5,Sun,7,7,...,0,0,0,0,0,0,0,0,42,47
2,Kyle Pitts,TE,ATL,0.0,NYJ,5,5,Sun,10,9,...,0,0,0,0,0,0,0,1,27,20
3,Darren Waller,TE,LVR,0.0,BAL,1,1,Mon,19,10,...,0,0,0,0,0,0,0,1,33,27
4,Rob Gronkowski,TE,TAM,0.0,DAL,1,1,Thu,8,8,...,0,0,0,0,0,0,0,1,31,29


In [214]:
# resave this dataframe
te_game.to_csv('../data/TE_season_9.csv')

---

In [215]:
# read in data sets
wr_game = pd.read_csv('../data/WR_season_9.csv')

In [216]:
wr_game

Unnamed: 0,player,pos,team,opp_home,opp,result,game,week,day,rec_tgt,...,dk_pt,pass_cmp,pass_att,pass_yds,pass_td,pass_int,rush_att,rush_yds,rush_td,fmb
0,Tyreek Hill,WR,KAN,@,PHI,W 42-30,4,4,Sun,12,...,50.6,0,0,0,0,0,0,0,0,0
1,Amari Cooper,WR,DAL,@,TAM,L 29-31,1,1,Thu,17,...,41.9,0,0,0,0,0,0,0,0,0
2,Davante Adams,WR,GNB,@,CIN,W 25-22,5,5,Sun,16,...,40.6,0,0,0,0,0,0,0,0,0
3,Cooper Kupp,WR,LAR,,DET,W 28-19,7,7,Sun,13,...,40.6,0,0,0,0,0,0,0,0,0
4,Tyreek Hill,WR,KAN,,CLE,W 33-29,1,1,Sun,15,...,40.1,0,0,0,0,0,1,4,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1081,James Washington,WR,PIT,,LVR,L 17-26,2,2,Sun,1,...,,0,0,0,0,0,0,0,0,0
1082,Dede Westbrook,WR,MIN,@,BAL,L 31-34,8,9,Sun,1,...,,0,0,0,0,0,0,0,0,0
1083,Cody White,WR,PIT,,DEN,W 27-19,5,5,Sun,1,...,,0,0,0,0,0,0,0,0,0
1084,Kevin White,WR,NOR,@,SEA,W 13-10,6,7,Mon,2,...,,0,0,0,0,0,0,0,0,0


In [217]:
wr_game.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1086 entries, 0 to 1085
Data columns (total 26 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   player     1086 non-null   object 
 1   pos        1086 non-null   object 
 2   team       1086 non-null   object 
 3   opp_home   541 non-null    object 
 4   opp        1086 non-null   object 
 5   result     1086 non-null   object 
 6   game       1086 non-null   int64  
 7   week       1086 non-null   int64  
 8   day        1086 non-null   object 
 9   rec_tgt    1086 non-null   int64  
 10  rec        1086 non-null   int64  
 11  rec_yds    1086 non-null   int64  
 12  rec_ydr    984 non-null    float64
 13  rec_td     1086 non-null   int64  
 14  rec_pct    1086 non-null   object 
 15  rec_ydtgt  1086 non-null   float64
 16  dk_pt      993 non-null    float64
 17  pass_cmp   1086 non-null   int64  
 18  pass_att   1086 non-null   int64  
 19  pass_yds   1086 non-null   int64  
 20  pass_td 

In [218]:
wr_game.isnull().sum()

player         0
pos            0
team           0
opp_home     545
opp            0
result         0
game           0
week           0
day            0
rec_tgt        0
rec            0
rec_yds        0
rec_ydr      102
rec_td         0
rec_pct        0
rec_ydtgt      0
dk_pt         93
pass_cmp       0
pass_att       0
pass_yds       0
pass_td        0
pass_int       0
rush_att       0
rush_yds       0
rush_td        0
fmb            0
dtype: int64

In [219]:
# for opponent location change '@' to a 1 and NaN to 0.
wr_game['opp_home'].replace('@', 1, inplace=True)
wr_game['opp_home'].fillna(0, inplace=True)

# convert NaN to 0
wr_game['dk_pt'].fillna(0, inplace=True)
wr_game['rec_ydr'].fillna(0, inplace=True)

In [220]:
# break up the 'result' into a win/loss column for the offense, offense score, and defense score
# convert wins 'W' and losses 'L' to 1 and 0
wr_game['team_win'] = wr_game['result'].str.split().str[0]
wr_game['team_win'] = wr_game['team_win'].map({'W':1 ,'L':0})
wr_game['team_score'] = wr_game['result'].str.split().str[1]
wr_game['team_score'] = wr_game['team_score'].str.replace('-', ' ')
wr_game['opp_score'] = wr_game['team_score'].str.split().str[1].astype(int)
wr_game['team_score'] = wr_game['team_score'].str.split().str[0].astype(int)
wr_game.drop(columns=['result'], inplace=True)
wr_game.head()

Unnamed: 0,player,pos,team,opp_home,opp,game,week,day,rec_tgt,rec,...,pass_yds,pass_td,pass_int,rush_att,rush_yds,rush_td,fmb,team_win,team_score,opp_score
0,Tyreek Hill,WR,KAN,1.0,PHI,4,4,Sun,12,11,...,0,0,0,0,0,0,0,1,42,30
1,Amari Cooper,WR,DAL,1.0,TAM,1,1,Thu,17,13,...,0,0,0,0,0,0,0,0,29,31
2,Davante Adams,WR,GNB,1.0,CIN,5,5,Sun,16,11,...,0,0,0,0,0,0,0,1,25,22
3,Cooper Kupp,WR,LAR,0.0,DET,7,7,Sun,13,10,...,0,0,0,0,0,0,0,1,28,19
4,Tyreek Hill,WR,KAN,0.0,CLE,1,1,Sun,15,11,...,0,0,0,1,4,0,0,1,33,29


In [221]:
# resave this dataframe
wr_game.to_csv('../data/WR_season_9.csv')

---

## Read in player results for week 10 data set and clean for modeling
---

In [375]:
# read in data sets
qb_results = pd.read_csv('../data/week_10_QB_results.csv')

In [377]:
qb_results.head()

Unnamed: 0,player,team,opp_home,opp,result,week,pass_cmp,pass_att,pass_pct,pass_yds,...,pass_rtg,sk,sk_yds,pass_yda,pass_ayda,DK_pt,rush_att,rush_yds,rush_td,fmb
0,Patrick Mahomes,KAN,@,LVR,W 41-14,10,35,50,70.0,406,...,127.6,0,0,8.12,10.12,39.2,0,0,0,0
1,Dak Prescott,DAL,,ATL,W 43-3,10,24,31,77.42,296,...,127.9,0,0,9.55,10.84,26.3,2,5,1,0
2,Josh Allen,BUF,@,NYJ,W 45-17,10,21,28,75.0,366,...,125.6,2,16,13.07,12.89,24.9,2,3,0,0
3,Mac Jones,NWE,,CLE,W 45-7,10,19,23,82.61,198,...,142.1,2,15,8.61,11.22,19.9,0,0,0,0
4,Trevor Siemian,NOR,@,TEN,L 21-23,10,19,34,55.88,298,...,104.8,4,22,8.76,9.94,19.9,0,0,0,0


In [378]:
qb_results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34 entries, 0 to 33
Data columns (total 22 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   player     34 non-null     object 
 1   team       34 non-null     object 
 2   opp_home   16 non-null     object 
 3   opp        34 non-null     object 
 4   result     34 non-null     object 
 5   week       34 non-null     int64  
 6   pass_cmp   34 non-null     int64  
 7   pass_att   34 non-null     int64  
 8   pass_pct   34 non-null     float64
 9   pass_yds   34 non-null     int64  
 10  pass_td    34 non-null     int64  
 11  pass_int   34 non-null     int64  
 12  pass_rtg   34 non-null     float64
 13  sk         34 non-null     int64  
 14  sk_yds     34 non-null     int64  
 15  pass_yda   34 non-null     float64
 16  pass_ayda  34 non-null     float64
 17  DK_pt      34 non-null     float64
 18  rush_att   34 non-null     int64  
 19  rush_yds   34 non-null     int64  
 20  rush_td    3

In [379]:
qb_results.isnull().sum()

player        0
team          0
opp_home     18
opp           0
result        0
week          0
pass_cmp      0
pass_att      0
pass_pct      0
pass_yds      0
pass_td       0
pass_int      0
pass_rtg      0
sk            0
sk_yds        0
pass_yda      0
pass_ayda     0
DK_pt         0
rush_att      0
rush_yds      0
rush_td       0
fmb           0
dtype: int64

In [380]:
# for opponent location change '@' to a 1 and NaN to 0.
qb_results['opp_home'].replace('@', 1, inplace=True)
qb_results['opp_home'].fillna(0, inplace=True)

In [381]:
# break up the 'result' into a win/loss column for the offense, offense score, and defense score
# convert wins 'W' and losses 'L' to 1 and 0
qb_results['team_win'] = qb_results['result'].str.split().str[0]
qb_results['team_win'] = qb_results['team_win'].map({'W':1 ,'L':0})
qb_results['team_score'] = qb_results['result'].str.split().str[1]
qb_results['team_score'] = qb_results['team_score'].str.replace('-', ' ')
qb_results['opp_score'] = qb_results['team_score'].str.split().str[1].astype(int)
qb_results['team_score'] = qb_results['team_score'].str.split().str[0].astype(int)
qb_results.drop(columns=['result'], inplace=True)
qb_results.head()

Unnamed: 0,player,team,opp_home,opp,week,pass_cmp,pass_att,pass_pct,pass_yds,pass_td,...,pass_yda,pass_ayda,DK_pt,rush_att,rush_yds,rush_td,fmb,team_win,team_score,opp_score
0,Patrick Mahomes,KAN,1.0,LVR,10,35,50,70.0,406,5,...,8.12,10.12,39.2,0,0,0,0,1.0,41,14
1,Dak Prescott,DAL,0.0,ATL,10,24,31,77.42,296,2,...,9.55,10.84,26.3,2,5,1,0,1.0,43,3
2,Josh Allen,BUF,1.0,NYJ,10,21,28,75.0,366,2,...,13.07,12.89,24.9,2,3,0,0,1.0,45,17
3,Mac Jones,NWE,0.0,CLE,10,19,23,82.61,198,3,...,8.61,11.22,19.9,0,0,0,0,1.0,45,7
4,Trevor Siemian,NOR,1.0,TEN,10,19,34,55.88,298,2,...,8.76,9.94,19.9,0,0,0,0,0.0,21,23


In [382]:
# resave this dataframe
qb_results.to_csv('../data/week_10_QB_results.csv')

---

In [383]:
# read in data sets
rb_results = pd.read_csv('../data/week_10_RB_results.csv')

In [384]:
rb_results.head()

Unnamed: 0,player,team,opp_home,opp,result,week,rush_att,rush_yds,rush_yda,rush_td,rec_tgt,rec,rec_yds,rec_ydr,rec_td,rec_pct,rec_ydtgt,DK_pt,fmb
0,Darrel Williams,KAN,@,LVR,W 41-14,10,11,43,3.91,0,9,9,101,11.22,1,100.00%,11.22,32.4,0
1,Rhamondre Stevenson,NWE,,CLE,W 45-7,10,20,100,5.0,2,5,4,14,3.5,0,80.00%,2.8,30.4,0
2,Jonathan Taylor,IND,,JAX,W 23-17,10,21,116,5.52,1,8,6,10,1.67,0,75.00%,1.25,27.6,0
3,AJ Dillon,GNB,,SEA,W 17-0,10,21,66,3.14,2,2,2,62,31.0,0,100.00%,31.0,26.8,0
4,Christian McCaffrey,CAR,@,ARI,W 34-10,10,13,95,7.31,0,10,10,66,6.6,0,100.00%,6.6,26.1,0


In [385]:
rb_results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79 entries, 0 to 78
Data columns (total 19 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   player     79 non-null     object 
 1   team       79 non-null     object 
 2   opp_home   40 non-null     object 
 3   opp        79 non-null     object 
 4   result     79 non-null     object 
 5   week       79 non-null     int64  
 6   rush_att   79 non-null     int64  
 7   rush_yds   79 non-null     int64  
 8   rush_yda   67 non-null     float64
 9   rush_td    79 non-null     int64  
 10  rec_tgt    79 non-null     int64  
 11  rec        79 non-null     int64  
 12  rec_yds    79 non-null     int64  
 13  rec_ydr    53 non-null     float64
 14  rec_td     79 non-null     int64  
 15  rec_pct    79 non-null     object 
 16  rec_ydtgt  56 non-null     float64
 17  DK_pt      71 non-null     float64
 18  fmb        79 non-null     int64  
dtypes: float64(4), int64(9), object(6)
memory usage: 11.

In [386]:
rb_results.isnull().sum()

player        0
team          0
opp_home     39
opp           0
result        0
week          0
rush_att      0
rush_yds      0
rush_yda     12
rush_td       0
rec_tgt       0
rec           0
rec_yds       0
rec_ydr      26
rec_td        0
rec_pct       0
rec_ydtgt    23
DK_pt         8
fmb           0
dtype: int64

In [387]:
# for opponent location change '@' to a 1 and NaN to 0.
rb_results['opp_home'].replace('@', 1, inplace=True)
rb_results['opp_home'].fillna(0, inplace=True)

# convert NaN to 0
rb_results['rush_yda'].fillna(0, inplace=True)
rb_results['rec_ydr'].fillna(0, inplace=True)
rb_results['rec_ydtgt'].fillna(0, inplace=True)
rb_results['DK_pt'].fillna(0, inplace=True)

In [388]:
# break up the 'result' into a win/loss column for the offense, offense score, and defense score
# convert wins 'W' and losses 'L' to 1 and 0
rb_results['team_win'] = rb_results['result'].str.split().str[0]
rb_results['team_win'] = rb_results['team_win'].map({'W':1 ,'L':0})
rb_results['team_score'] = rb_results['result'].str.split().str[1]
rb_results['team_score'] = rb_results['team_score'].str.replace('-', ' ')
rb_results['opp_score'] = rb_results['team_score'].str.split().str[1].astype(int)
rb_results['team_score'] = rb_results['team_score'].str.split().str[0].astype(int)
rb_results.drop(columns=['result'], inplace=True)
rb_results.head()

Unnamed: 0,player,team,opp_home,opp,week,rush_att,rush_yds,rush_yda,rush_td,rec_tgt,...,rec_yds,rec_ydr,rec_td,rec_pct,rec_ydtgt,DK_pt,fmb,team_win,team_score,opp_score
0,Darrel Williams,KAN,1.0,LVR,10,11,43,3.91,0,9,...,101,11.22,1,100.00%,11.22,32.4,0,1.0,41,14
1,Rhamondre Stevenson,NWE,0.0,CLE,10,20,100,5.0,2,5,...,14,3.5,0,80.00%,2.8,30.4,0,1.0,45,7
2,Jonathan Taylor,IND,0.0,JAX,10,21,116,5.52,1,8,...,10,1.67,0,75.00%,1.25,27.6,0,1.0,23,17
3,AJ Dillon,GNB,0.0,SEA,10,21,66,3.14,2,2,...,62,31.0,0,100.00%,31.0,26.8,0,1.0,17,0
4,Christian McCaffrey,CAR,1.0,ARI,10,13,95,7.31,0,10,...,66,6.6,0,100.00%,6.6,26.1,0,1.0,34,10


In [390]:
# resave this dataframe
rb_results.to_csv('../data/week_10_RB_results.csv')

---

In [391]:
# read in data sets
te_results = pd.read_csv('../data/week_10_TE_results.csv')

In [392]:
te_results.head()

Unnamed: 0,player,team,opp_home,opp,result,week,rec_tgt,rec,rec_yds,rec_ydr,rec_td,rec_pct,rec_ydtgt,DK_pt,rush_att,rush_yds,rush_td,fmb
0,Travis Kelce,KAN,@,LVR,W 41-14,10,10,8,119,14.88,0,80.00%,11.9,22.9,0,0,0,0
1,Hunter Henry,NWE,,CLE,W 45-7,10,4,4,37,9.25,2,100.00%,9.25,19.7,0,0,0,0
2,Mark Andrews,BAL,@,MIA,L 10-22,10,8,6,63,10.5,1,75.00%,7.88,18.3,0,0,0,0
3,Tyler Conklin,MIN,@,LAC,W 27-20,10,5,3,11,3.67,2,60.00%,2.2,16.1,0,0,0,0
4,Gerald Everett,SEA,@,GNB,L 0-17,10,8,8,63,7.88,0,100.00%,7.88,14.3,0,0,0,0


In [393]:
te_results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54 entries, 0 to 53
Data columns (total 18 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   player     54 non-null     object 
 1   team       54 non-null     object 
 2   opp_home   23 non-null     object 
 3   opp        54 non-null     object 
 4   result     54 non-null     object 
 5   week       54 non-null     int64  
 6   rec_tgt    54 non-null     int64  
 7   rec        54 non-null     int64  
 8   rec_yds    54 non-null     int64  
 9   rec_ydr    45 non-null     float64
 10  rec_td     54 non-null     int64  
 11  rec_pct    54 non-null     object 
 12  rec_ydtgt  48 non-null     float64
 13  DK_pt      46 non-null     float64
 14  rush_att   54 non-null     int64  
 15  rush_yds   54 non-null     int64  
 16  rush_td    54 non-null     int64  
 17  fmb        54 non-null     int64  
dtypes: float64(3), int64(9), object(6)
memory usage: 7.7+ KB


In [394]:
te_results.isnull().sum()

player        0
team          0
opp_home     31
opp           0
result        0
week          0
rec_tgt       0
rec           0
rec_yds       0
rec_ydr       9
rec_td        0
rec_pct       0
rec_ydtgt     6
DK_pt         8
rush_att      0
rush_yds      0
rush_td       0
fmb           0
dtype: int64

In [395]:
# for opponent location change '@' to a 1 and NaN to 0.
te_results['opp_home'].replace('@', 1, inplace=True)
te_results['opp_home'].fillna(0, inplace=True)

# convert NaN to 0
te_results['rec_ydr'].fillna(0, inplace=True)
te_results['rec_ydtgt'].fillna(0, inplace=True)
te_results['DK_pt'].fillna(0, inplace=True)

In [396]:
# break up the 'result' into a win/loss column for the offense, offense score, and defense score
# convert wins 'W' and losses 'L' to 1 and 0
te_results['team_win'] = te_results['result'].str.split().str[0]
te_results['team_win'] = te_results['team_win'].map({'W':1 ,'L':0})
te_results['team_score'] = te_results['result'].str.split().str[1]
te_results['team_score'] = te_results['team_score'].str.replace('-', ' ')
te_results['opp_score'] = te_results['team_score'].str.split().str[1].astype(int)
te_results['team_score'] = te_results['team_score'].str.split().str[0].astype(int)
te_results.drop(columns=['result'], inplace=True)
te_results.head()

Unnamed: 0,player,team,opp_home,opp,week,rec_tgt,rec,rec_yds,rec_ydr,rec_td,rec_pct,rec_ydtgt,DK_pt,rush_att,rush_yds,rush_td,fmb,team_win,team_score,opp_score
0,Travis Kelce,KAN,1.0,LVR,10,10,8,119,14.88,0,80.00%,11.9,22.9,0,0,0,0,1.0,41,14
1,Hunter Henry,NWE,0.0,CLE,10,4,4,37,9.25,2,100.00%,9.25,19.7,0,0,0,0,1.0,45,7
2,Mark Andrews,BAL,1.0,MIA,10,8,6,63,10.5,1,75.00%,7.88,18.3,0,0,0,0,0.0,10,22
3,Tyler Conklin,MIN,1.0,LAC,10,5,3,11,3.67,2,60.00%,2.2,16.1,0,0,0,0,1.0,27,20
4,Gerald Everett,SEA,1.0,GNB,10,8,8,63,7.88,0,100.00%,7.88,14.3,0,0,0,0,0.0,0,17


In [398]:
# resave this dataframe
te_results.to_csv('../data/week_10_TE_results.csv')

---

In [399]:
# read in data sets
wr_results = pd.read_csv('../data/week_10_WR_results.csv')

In [400]:
wr_results.head()

Unnamed: 0,player,team,opp_home,opp,result,week,rec_tgt,rec,rec_yds,rec_ydr,rec_td,rec_pct,rec_ydtgt,DK_pt,rush_att,rush_yds,rush_td,fmb
0,Stefon Diggs,BUF,@,NYJ,W 45-17,10,13,8,162,20.25,1,61.50%,12.46,33.2,0,0,0,0
1,CeeDee Lamb,DAL,,ATL,W 43-3,10,7,6,94,15.67,2,85.70%,13.43,28.6,1,12,0,0
2,Tyreek Hill,KAN,@,LVR,W 41-14,10,10,7,83,11.86,2,70.00%,8.3,27.5,1,2,0,1
3,Justin Jefferson,MIN,@,LAC,W 27-20,10,11,9,143,15.89,0,81.80%,13.0,25.9,1,-4,0,0
4,Kendrick Bourne,NWE,,CLE,W 45-7,10,4,4,98,24.5,1,100.00%,24.5,24.1,3,43,0,0


In [401]:
wr_results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 18 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   player     100 non-null    object 
 1   team       100 non-null    object 
 2   opp_home   49 non-null     object 
 3   opp        100 non-null    object 
 4   result     100 non-null    object 
 5   week       100 non-null    int64  
 6   rec_tgt    100 non-null    int64  
 7   rec        100 non-null    int64  
 8   rec_yds    100 non-null    int64  
 9   rec_ydr    94 non-null     float64
 10  rec_td     100 non-null    int64  
 11  rec_pct    100 non-null    object 
 12  rec_ydtgt  98 non-null     float64
 13  DK_pt      97 non-null     float64
 14  rush_att   100 non-null    int64  
 15  rush_yds   100 non-null    int64  
 16  rush_td    100 non-null    int64  
 17  fmb        100 non-null    int64  
dtypes: float64(3), int64(9), object(6)
memory usage: 14.2+ KB


In [402]:
wr_results.isnull().sum()

player        0
team          0
opp_home     51
opp           0
result        0
week          0
rec_tgt       0
rec           0
rec_yds       0
rec_ydr       6
rec_td        0
rec_pct       0
rec_ydtgt     2
DK_pt         3
rush_att      0
rush_yds      0
rush_td       0
fmb           0
dtype: int64

In [403]:
# for opponent location change '@' to a 1 and NaN to 0.
wr_results['opp_home'].replace('@', 1, inplace=True)
wr_results['opp_home'].fillna(0, inplace=True)

# convert NaN to 0
wr_results['rec_ydr'].fillna(0, inplace=True)
wr_results['rec_ydtgt'].fillna(0, inplace=True)
wr_results['DK_pt'].fillna(0, inplace=True)

In [404]:
# break up the 'result' into a win/loss column for the offense, offense score, and defense score
# convert wins 'W' and losses 'L' to 1 and 0
wr_results['team_win'] = wr_results['result'].str.split().str[0]
wr_results['team_win'] = wr_results['team_win'].map({'W':1 ,'L':0})
wr_results['team_score'] = wr_results['result'].str.split().str[1]
wr_results['team_score'] = wr_results['team_score'].str.replace('-', ' ')
wr_results['opp_score'] = wr_results['team_score'].str.split().str[1].astype(int)
wr_results['team_score'] = wr_results['team_score'].str.split().str[0].astype(int)
wr_results.drop(columns=['result'], inplace=True)
wr_results.head()

Unnamed: 0,player,team,opp_home,opp,week,rec_tgt,rec,rec_yds,rec_ydr,rec_td,rec_pct,rec_ydtgt,DK_pt,rush_att,rush_yds,rush_td,fmb,team_win,team_score,opp_score
0,Stefon Diggs,BUF,1.0,NYJ,10,13,8,162,20.25,1,61.50%,12.46,33.2,0,0,0,0,1.0,45,17
1,CeeDee Lamb,DAL,0.0,ATL,10,7,6,94,15.67,2,85.70%,13.43,28.6,1,12,0,0,1.0,43,3
2,Tyreek Hill,KAN,1.0,LVR,10,10,7,83,11.86,2,70.00%,8.3,27.5,1,2,0,1,1.0,41,14
3,Justin Jefferson,MIN,1.0,LAC,10,11,9,143,15.89,0,81.80%,13.0,25.9,1,-4,0,0,1.0,27,20
4,Kendrick Bourne,NWE,0.0,CLE,10,4,4,98,24.5,1,100.00%,24.5,24.1,3,43,0,0,1.0,45,7


In [405]:
# resave this dataframe
wr_results.to_csv('../data/week_10_WR_results.csv')

---