In [1]:
import pandas as pd
import numpy as np
import sklearn

# Determining Baseline R^2 using just Average FPTS

In [2]:
scraped_df = pd.read_csv('ma10.csv')

x_baseline = scraped_df['FPTS10_MA'].values.reshape(-1, 1)
y_baseline = scraped_df['FPTS']

In [3]:
from sklearn.linear_model import LinearRegression

reg = LinearRegression()

from sklearn.model_selection import cross_val_score
scores = cross_val_score(reg, x_baseline, y_baseline, cv=4)
avg_R_squared = scores.mean()
print(scores)
avg_R_squared

[0.43602285 0.49512975 0.4811836  0.47346451]


0.4714501782590032

In [4]:
from sklearn.linear_model import Lasso

Lasso = Lasso()

scores = cross_val_score(Lasso, x_baseline, y_baseline, cv=4)
avg_R_squared = scores.mean()
print(scores)
avg_R_squared

[0.43625473 0.49490616 0.48086367 0.47358931]


0.47140346695423874

In [5]:
from sklearn.ensemble import RandomForestRegressor

for i in [3, 4, 5, 6]:
    ran_forrest = RandomForestRegressor(max_depth=i)
    scores = cross_val_score(ran_forrest, x_baseline, y_baseline, cv=4)
    print("Scores depth", i, ':', scores)
    avg_R_squared = scores.mean()
    print(avg_R_squared)

Scores depth 3 : [0.43357477 0.4821246  0.47605663 0.47177748]
0.4658833707573993
Scores depth 4 : [0.43550677 0.48797159 0.4789567  0.47321043]
0.46891137311768377
Scores depth 5 : [0.43413723 0.48459026 0.47927173 0.4709623 ]
0.46724037996323753
Scores depth 6 : [0.43409731 0.48465491 0.4786028  0.47074481]
0.46702495939879324


From these baseline models, we can see that the best out of sample R^2 using just previous FPTS to predict future FPTS is about .4714. We will try to construct models that perform better than this.

# Training the model for top 50 players

## Training a Linear Regression Model

### Splitting the data into X and y
#### Don't need train and test sets because using cross validation

In [6]:
# Import data to train the model

scraped_df = pd.read_csv('ma10.csv')

X = scraped_df.loc[:,['FPTS10_MA', 'PTS10_MA', 'AST10_MA', 'STL10_MA', 'BLK10_MA', 'DD10_MA', 'DT10_MA', 'MP10_MA', 'FGA10_MA', 'D_R']]
y = scraped_df['FPTS']

X

Unnamed: 0,FPTS10_MA,PTS10_MA,AST10_MA,STL10_MA,BLK10_MA,DD10_MA,DT10_MA,MP10_MA,FGA10_MA,D_R
0,44.450,22.5,6.2,1.8,0.4,0.30,0.2,36.330,15.0,109.2
1,45.925,22.8,6.7,2.0,0.3,0.30,0.2,36.256,14.5,109.2
2,44.825,23.9,5.7,1.9,0.4,0.30,0.1,35.692,14.7,112.6
3,41.800,21.4,5.7,1.8,0.5,0.35,0.1,35.322,14.0,108.1
4,39.575,20.0,5.3,1.9,0.4,0.30,0.0,35.113,13.2,112.6
...,...,...,...,...,...,...,...,...,...,...
10232,27.175,13.2,4.2,1.0,0.2,0.15,0.0,31.235,10.1,116.0
10233,25.725,12.2,4.0,1.0,0.2,0.15,0.0,30.764,9.5,116.0
10234,25.500,11.9,3.9,1.0,0.3,0.20,0.0,31.298,9.3,111.7
10235,26.350,11.9,4.4,1.1,0.3,0.20,0.0,31.474,9.7,111.7


### fitting the linear model

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso


reg = LinearRegression()
Lasso = Lasso()

### Cacluating R^2 with 4-Fold Cross Validation

In [8]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(reg, X, y, cv=4)
avg_R_squared = scores.mean()
print(scores)
avg_R_squared

[0.44382352 0.49727393 0.48626308 0.48222697]


0.47739687672774067

### Testing LASSO

In [9]:
scores = cross_val_score(Lasso, X, y, cv=4)
avg_R_squared = scores.mean()
print(scores)
avg_R_squared

[0.43830434 0.49534387 0.48272585 0.47575517]


0.4730323093327961

## Training a Random Forrest Model

In [10]:
from sklearn.ensemble import RandomForestRegressor

for i in [3, 4, 5, 6]:
    ran_forrest = RandomForestRegressor(max_depth=i)
    scores = cross_val_score(ran_forrest, X, y, cv=4)
    print("Scores depth", i, ':', scores)
    avg_R_squared = scores.mean()
    print(avg_R_squared)


Scores depth 3 : [0.4330808  0.48237737 0.4749067  0.4717473 ]
0.4655280427984382
Scores depth 4 : [0.43519636 0.48277316 0.4792083  0.47347034]
0.46766203989362776
Scores depth 5 : [0.43529714 0.48418646 0.48058157 0.47357956]
0.46841118411098415
Scores depth 6 : [0.43493847 0.48285863 0.47855741 0.47198297]
0.46708437010694703


Based on 4-Fold Cross Validation, a simple linear model does the best job at predicting out-of-sample FPTS. Therefore, we will use this model to predict the FPTS of the top 50 fantasy players each day.

In [11]:
Top_50_Model = LinearRegression().fit(X, y)

# Training the model for the rest of the players

We are unable to scrape detailed data every day for players below the top 50 fantasy performers. Therefore, we will evaluate the effectiveness of a second model that only uses the two features available for all players: Average FPTS and D_R

In [12]:
X_simple = scraped_df.loc[:,['FPTS10_MA', 'D_R']]
y_simple = scraped_df['FPTS']

In [13]:
scores = cross_val_score(reg, X_simple, y_simple, cv=4)
avg_R_squared = scores.mean()
print(scores)
avg_R_squared

[0.43906753 0.49400228 0.48447102 0.47638676]


0.473481897495585

In [14]:
scores = cross_val_score(Lasso, X_simple, y_simple, cv=4)
avg_R_squared = scores.mean()
print(scores)
avg_R_squared

[0.43823821 0.49515513 0.48287207 0.47550573]


0.47294278473675555

In [15]:
for i in [3, 4, 5, 6]:
    ran_forrest = RandomForestRegressor(max_depth=i)
    scores = cross_val_score(ran_forrest, X_simple, y_simple, cv=4)
    print("Scores depth", i, ':', scores)
    avg_R_squared = scores.mean()
    print(avg_R_squared)

Scores depth 3 : [0.43281577 0.48228516 0.47596782 0.47241594]
0.4658711713385235
Scores depth 4 : [0.43610661 0.4837773  0.48087601 0.47440807]
0.46879199595869087
Scores depth 5 : [0.43399275 0.48350321 0.4814746  0.47374867]
0.4681798090179746
Scores depth 6 : [0.42689153 0.48170734 0.47968864 0.46986556]
0.46453826649764074


Once again, a simple linear regression was the strongest model for predicting out of sample in our simplified set of features. Therefore, we will again use this model for predicting the FPTS of the rest of players outside of the top 50.

In [16]:
Rest_of_Players_Model = LinearRegression().fit(X_simple, y_simple)

# Using the Models to Predict Performance

### Upload the data for eligible players from DraftKings.com

In [17]:
DK_Data = pd.read_csv('Final Project\DKSalaries.csv')
DK_Data['Game Info'].astype('string')

0      WAS@UTA 04/12/2021 09:00PM ET
1      PHI@DAL 04/12/2021 07:30PM ET
2      BKN@MIN 04/12/2021 08:00PM ET
3      BKN@MIN 04/12/2021 08:00PM ET
4       DEN@GS 04/12/2021 10:00PM ET
                   ...              
292    BKN@MIN 04/12/2021 08:00PM ET
293    BKN@MIN 04/12/2021 08:00PM ET
294    BKN@MIN 04/12/2021 08:00PM ET
295    BKN@MIN 04/12/2021 08:00PM ET
296    BKN@MIN 04/12/2021 08:00PM ET
Name: Game Info, Length: 297, dtype: string

In [18]:
DK_Data

Unnamed: 0,Position,Name + ID,Name,ID,Roster Position,Salary,Game Info,TeamAbbrev,AvgPointsPerGame
0,PG,Russell Westbrook (17028427),Russell Westbrook,17028427,PG/G/UTIL,11000,WAS@UTA 04/12/2021 09:00PM ET,WAS,55.26
1,PG/SF,Luka Doncic (17028430),Luka Doncic,17028430,PG/SF/F/G/UTIL,10700,PHI@DAL 04/12/2021 07:30PM ET,DAL,55.18
2,PG,James Harden (17028435),James Harden,17028435,PG/G/UTIL,10600,BKN@MIN 04/12/2021 08:00PM ET,BKN,56.62
3,C,Karl-Anthony Towns (17028438),Karl-Anthony Towns,17028438,C/UTIL,10500,BKN@MIN 04/12/2021 08:00PM ET,MIN,50.92
4,C,Nikola Jokic (17028440),Nikola Jokic,17028440,C/UTIL,10400,DEN@GS 04/12/2021 10:00PM ET,DEN,58.43
...,...,...,...,...,...,...,...,...,...
292,C,Ed Davis (17029202),Ed Davis,17029202,C/UTIL,3000,BKN@MIN 04/12/2021 08:00PM ET,MIN,12.09
293,PF/C,Jarred Vanderbilt (17029204),Jarred Vanderbilt,17029204,PF/C/F/UTIL,3000,BKN@MIN 04/12/2021 08:00PM ET,MIN,16.41
294,SG,Spencer Dinwiddie (17029324),Spencer Dinwiddie,17029324,SG/G/UTIL,3000,BKN@MIN 04/12/2021 08:00PM ET,BKN,18.08
295,PG,Chris Chiozza (17029327),Chris Chiozza,17029327,PG/G/UTIL,3000,BKN@MIN 04/12/2021 08:00PM ET,BKN,10.74


## Adding Opponent Defensive Rating data from another dataset

Extract the Opponent from the Game Info string by comparing it to the player's team in TeamAbbrev

In [19]:
# Create new Away and Home columns

DK_Data['Away'] = DK_Data['Game Info'].str.split('@').str[0]
DK_Data['Home'] = DK_Data['Game Info'].str.split('@').str[1].str[:3]
DK_Data

Unnamed: 0,Position,Name + ID,Name,ID,Roster Position,Salary,Game Info,TeamAbbrev,AvgPointsPerGame,Away,Home
0,PG,Russell Westbrook (17028427),Russell Westbrook,17028427,PG/G/UTIL,11000,WAS@UTA 04/12/2021 09:00PM ET,WAS,55.26,WAS,UTA
1,PG/SF,Luka Doncic (17028430),Luka Doncic,17028430,PG/SF/F/G/UTIL,10700,PHI@DAL 04/12/2021 07:30PM ET,DAL,55.18,PHI,DAL
2,PG,James Harden (17028435),James Harden,17028435,PG/G/UTIL,10600,BKN@MIN 04/12/2021 08:00PM ET,BKN,56.62,BKN,MIN
3,C,Karl-Anthony Towns (17028438),Karl-Anthony Towns,17028438,C/UTIL,10500,BKN@MIN 04/12/2021 08:00PM ET,MIN,50.92,BKN,MIN
4,C,Nikola Jokic (17028440),Nikola Jokic,17028440,C/UTIL,10400,DEN@GS 04/12/2021 10:00PM ET,DEN,58.43,DEN,GS
...,...,...,...,...,...,...,...,...,...,...,...
292,C,Ed Davis (17029202),Ed Davis,17029202,C/UTIL,3000,BKN@MIN 04/12/2021 08:00PM ET,MIN,12.09,BKN,MIN
293,PF/C,Jarred Vanderbilt (17029204),Jarred Vanderbilt,17029204,PF/C/F/UTIL,3000,BKN@MIN 04/12/2021 08:00PM ET,MIN,16.41,BKN,MIN
294,SG,Spencer Dinwiddie (17029324),Spencer Dinwiddie,17029324,SG/G/UTIL,3000,BKN@MIN 04/12/2021 08:00PM ET,BKN,18.08,BKN,MIN
295,PG,Chris Chiozza (17029327),Chris Chiozza,17029327,PG/G/UTIL,3000,BKN@MIN 04/12/2021 08:00PM ET,BKN,10.74,BKN,MIN


In [20]:
# Use boolean masks to determine the Opponent

mask = DK_Data['Away'] == DK_Data['TeamAbbrev']
mask2 = DK_Data['Home'] == DK_Data['TeamAbbrev']
DK_Data['OPP'] = mask*DK_Data['Home'] + mask2*DK_Data['Away']
DK_Data

Unnamed: 0,Position,Name + ID,Name,ID,Roster Position,Salary,Game Info,TeamAbbrev,AvgPointsPerGame,Away,Home,OPP
0,PG,Russell Westbrook (17028427),Russell Westbrook,17028427,PG/G/UTIL,11000,WAS@UTA 04/12/2021 09:00PM ET,WAS,55.26,WAS,UTA,UTA
1,PG/SF,Luka Doncic (17028430),Luka Doncic,17028430,PG/SF/F/G/UTIL,10700,PHI@DAL 04/12/2021 07:30PM ET,DAL,55.18,PHI,DAL,PHI
2,PG,James Harden (17028435),James Harden,17028435,PG/G/UTIL,10600,BKN@MIN 04/12/2021 08:00PM ET,BKN,56.62,BKN,MIN,MIN
3,C,Karl-Anthony Towns (17028438),Karl-Anthony Towns,17028438,C/UTIL,10500,BKN@MIN 04/12/2021 08:00PM ET,MIN,50.92,BKN,MIN,BKN
4,C,Nikola Jokic (17028440),Nikola Jokic,17028440,C/UTIL,10400,DEN@GS 04/12/2021 10:00PM ET,DEN,58.43,DEN,GS,GS
...,...,...,...,...,...,...,...,...,...,...,...,...
292,C,Ed Davis (17029202),Ed Davis,17029202,C/UTIL,3000,BKN@MIN 04/12/2021 08:00PM ET,MIN,12.09,BKN,MIN,BKN
293,PF/C,Jarred Vanderbilt (17029204),Jarred Vanderbilt,17029204,PF/C/F/UTIL,3000,BKN@MIN 04/12/2021 08:00PM ET,MIN,16.41,BKN,MIN,BKN
294,SG,Spencer Dinwiddie (17029324),Spencer Dinwiddie,17029324,SG/G/UTIL,3000,BKN@MIN 04/12/2021 08:00PM ET,BKN,18.08,BKN,MIN,MIN
295,PG,Chris Chiozza (17029327),Chris Chiozza,17029327,PG/G/UTIL,3000,BKN@MIN 04/12/2021 08:00PM ET,BKN,10.74,BKN,MIN,MIN


In [21]:
# Pull in the data on Defensive Ratings of each team over the past three seasons

D_R_Data = pd.read_csv('Defensive Ratings.csv')
D_R_Data['OPP'] = D_R_Data['Fantasy Data Abbr']
D_R_Data

Unnamed: 0,Full Name,Abr,Fantasy Data Abbr,2020-2021,2019-2020,2018-2019,OPP
0,Atlanta Hawks,Atl,ATL,112.3,114.4,113.1,ATL
1,Boston Celtics,Bos,BOS,111.4,106.5,107.0,BOS
2,Brooklyn Nets,Bkn,BKN,113.3,109.2,109.0,BKN
3,Charlotte Hornets,Cha,CHA,111.3,112.8,112.0,CHA
4,Chicago Bulls,Chi,CHI,112.3,108.9,112.8,CHI
5,Cleveland Cavaliers,Cle,CLE,112.1,114.8,116.8,CLE
6,Dallas Mavericks,Dal,DAL,111.7,111.2,110.1,DAL
7,Denver Nuggets,Den,DEN,111.7,110.4,108.1,DEN
8,Detroit Pistons,Det,DET,112.4,112.3,108.7,DET
9,Golden State Warriors,GSW,GS,110.4,113.0,108.6,GS


In [22]:
# Join the Defensive Rating data with the data downloaded from DraftKings based on the OPP column

DK_Data = DK_Data.merge(D_R_Data, left_on='OPP', right_on='OPP')

DK_Data

Unnamed: 0,Position,Name + ID,Name,ID,Roster Position,Salary,Game Info,TeamAbbrev,AvgPointsPerGame,Away,Home,OPP,Full Name,Abr,Fantasy Data Abbr,2020-2021,2019-2020,2018-2019
0,PG,Russell Westbrook (17028427),Russell Westbrook,17028427,PG/G/UTIL,11000,WAS@UTA 04/12/2021 09:00PM ET,WAS,55.26,WAS,UTA,UTA,Utah Jazz,Uta,UTA,107.6,109.3,105.3
1,SG,Bradley Beal (17028476),Bradley Beal,17028476,SG/G/UTIL,8900,WAS@UTA 04/12/2021 09:00PM ET,WAS,46.85,WAS,UTA,UTA,Utah Jazz,Uta,UTA,107.6,109.3,105.3
2,PF,Rui Hachimura (17028596),Rui Hachimura,17028596,PF/F/UTIL,6300,WAS@UTA 04/12/2021 09:00PM ET,WAS,25.29,WAS,UTA,UTA,Utah Jazz,Uta,UTA,107.6,109.3,105.3
3,SF/PF,Deni Avdija (17028719),Deni Avdija,17028719,SF/PF/F/UTIL,5000,WAS@UTA 04/12/2021 09:00PM ET,WAS,16.47,WAS,UTA,UTA,Utah Jazz,Uta,UTA,107.6,109.3,105.3
4,C,Thomas Bryant (17028760),Thomas Bryant,17028760,C/UTIL,4600,WAS@UTA 04/12/2021 09:00PM ET,WAS,26.62,WAS,UTA,UTA,Utah Jazz,Uta,UTA,107.6,109.3,105.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192,SG/SF,Gary Harris (17028917),Gary Harris,17028917,SG/SF/F/G/UTIL,3900,SA@ORL 04/12/2021 08:00PM ET,ORL,17.43,SA,ORL,SA,San Antonio Spurs,SAS,SA,111.0,112.6,110.6
193,PG,Devin Cannady (17028979),Devin Cannady,17028979,PG/G/UTIL,3500,SA@ORL 04/12/2021 08:00PM ET,ORL,4.00,SA,ORL,SA,San Antonio Spurs,SAS,SA,111.0,112.6,110.6
194,PG,Karim Mane (17029065),Karim Mane,17029065,PG/G/UTIL,3200,SA@ORL 04/12/2021 08:00PM ET,ORL,3.94,SA,ORL,SA,San Antonio Spurs,SAS,SA,111.0,112.6,110.6
195,PF,Jonathan Isaac (17029196),Jonathan Isaac,17029196,PF/F/UTIL,3000,SA@ORL 04/12/2021 08:00PM ET,ORL,0.00,SA,ORL,SA,San Antonio Spurs,SAS,SA,111.0,112.6,110.6


## Use Top 50 Model to predict performance of Top 50 Fantasy Players

In [23]:
total_player_dictionary = pd.read_csv('player_data_csv.csv')
total_player_dictionary

Unnamed: 0,Name,Opp,Season,Team,GMS,ST,PTS,REB,AST,BLK,...,10FT%,103P%,10FTM,10_2PM,10_3PM,10TO,10MIN,10DD2,10TD3,10FPTS
0,Russell Westbrook,Uta,2021,WAS,46,1.0,21.717391,10.695652,10.847826,0.391304,...,72.1,30.6,3.8,7.1,1.5,5.1,,10,9,56.1
1,Stephen Curry,Den,2021,GS,46,1.0,30.413043,5.543478,5.913043,0.086957,...,91.5,45.6,7.2,5.6,5.4,3.5,,1,0,47.9
2,Zion Williamson,Sac,2021,NO,49,1.0,26.836735,7.142857,3.653061,0.673469,...,72.3,30.0,7.8,11.5,0.3,2.5,,2,0,48.24
3,Nikola Vucevic,Mem,2021,CHI,54,1.0,24.037037,11.388889,3.740741,0.703704,...,63.3,42.2,1.9,6.8,2.1,1.4,,5,0,41.61
4,Nikola Jokic,GSW,2021,DEN,54,1.0,26.166667,10.907407,8.796296,0.62963,...,58.3,29.7,3.4,8.3,0.9,3.4,,8,2,48.63
5,Luka Doncic,Phi,2021,DAL,48,1.0,28.5625,7.979167,8.5625,0.604167,...,63.7,33.3,3.9,6.9,3.1,4.6,,0,0,42.1
6,De'Aaron Fox,Nor,2021,SAC,53,1.0,25.037736,3.433962,7.132075,0.471698,...,77.4,29.5,6.1,8.3,2.1,2.6,,0,0,44.97
7,Bradley Beal,Uta,2021,WAS,45,0.977778,30.311111,4.733333,4.711111,0.355556,...,68.0,33.0,4.5,6.9,1.7,3.4,,0,0,31.66
8,Donovan Mitchell,Was,2021,UTA,51,1.0,26.607843,4.45098,5.333333,0.27451,...,72.6,38.8,6.3,7.0,3.3,2.0,,0,0,41.64
9,Joel Embiid,Dal,2021,PHI,36,1.0,29.555556,10.944444,3.083333,1.472222,...,87.8,38.3,9.7,7.9,1.1,2.4,,3,0,47.65


### Manage the data to get it in the right format

#### Conduct a join to add Opponent Defensive Rating, create new columns that are combinations of current columns

In [24]:
total_player_dictionary = total_player_dictionary.merge(D_R_Data, left_on='Opp', right_on='Abr')

total_player_dictionary['10_FGM'] = total_player_dictionary['10_2PM'] + total_player_dictionary['10_3PM']

total_player_dictionary['10_FGA'] = total_player_dictionary['10_FGM']/(total_player_dictionary['10FG%']/100)

total_player_dictionary['10DD2'] = total_player_dictionary['10DD2']/10

total_player_dictionary['10TD3'] = total_player_dictionary['10TD3']/10

total_player_dictionary = total_player_dictionary.set_index('Name')

total_player_dictionary

Unnamed: 0_level_0,Opp,Season,Team,GMS,ST,PTS,REB,AST,BLK,STL,...,10FPTS,Full Name,Abr,Fantasy Data Abbr,2020-2021,2019-2020,2018-2019,OPP,10_FGM,10_FGA
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Russell Westbrook,Uta,2021,WAS,46,1.0,21.717391,10.695652,10.847826,0.391304,1.217391,...,56.1,Utah Jazz,Uta,UTA,107.6,109.3,105.3,UTA,8.6,18.574514
Bradley Beal,Uta,2021,WAS,45,0.977778,30.311111,4.733333,4.711111,0.355556,1.133333,...,31.66,Utah Jazz,Uta,UTA,107.6,109.3,105.3,UTA,8.6,19.413093
Stephen Curry,Den,2021,GS,46,1.0,30.413043,5.543478,5.913043,0.086957,1.282609,...,47.9,Denver Nuggets,Den,DEN,111.7,110.4,108.1,DEN,11.0,21.235521
Andrew Wiggins,Den,2021,GS,54,1.0,18.037037,4.777778,2.314815,0.981481,0.962963,...,33.8,Denver Nuggets,Den,DEN,111.7,110.4,108.1,DEN,7.6,15.478615
Draymond Green,Den,2021,GS,46,1.0,6.565217,6.456522,8.391304,0.73913,1.608696,...,30.0,Denver Nuggets,Den,DEN,111.7,110.4,108.1,DEN,3.1,7.692308
Zion Williamson,Sac,2021,NO,49,1.0,26.836735,7.142857,3.653061,0.673469,0.897959,...,48.24,Sacramento Kings,Sac,SAC,117.8,111.4,110.8,SAC,11.8,18.84984
Brandon Ingram,Sac,2021,NO,49,1.0,23.979592,4.979592,4.795918,0.693878,0.673469,...,37.2,Sacramento Kings,Sac,SAC,117.8,111.4,110.8,SAC,8.6,19.239374
James Johnson,Sac,2021,NO,37,0.162162,6.675676,3.162162,2.0,0.918919,0.891892,...,19.93,Sacramento Kings,Sac,SAC,117.8,111.4,110.8,SAC,3.2,7.223476
Nikola Vucevic,Mem,2021,CHI,54,1.0,24.037037,11.388889,3.740741,0.703704,0.981481,...,41.61,Memphis Grizzlies,Mem,MEM,110.0,109.7,108.0,MEM,8.9,17.519685
Zach LaVine,Mem,2021,CHI,52,1.0,27.461538,5.096154,5.038462,0.480769,0.807692,...,37.42,Memphis Grizzlies,Mem,MEM,110.0,109.7,108.0,MEM,8.6,20.427553


### Collect the columns of data needed by the linear regression model

In [25]:
Top_50_X = total_player_dictionary.loc[:,['10FPTS', '10PTS', '10AST', '10STL', '10BLK', '10DD2', '10TD3', 'MIN', '10_FGA', '2020-2021']]

Top_50_X

Unnamed: 0_level_0,10FPTS,10PTS,10AST,10STL,10BLK,10DD2,10TD3,MIN,10_FGA,2020-2021
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Russell Westbrook,56.1,22.5,13.0,0.9,0.3,1.0,0.9,35.326087,18.574514,107.6
Bradley Beal,31.66,23.4,5.0,0.3,0.1,0.0,0.0,34.466667,19.413093,107.6
Stephen Curry,47.9,34.6,4.8,1.1,0.1,0.1,0.0,33.956522,21.235521,111.7
Andrew Wiggins,33.8,19.6,3.2,1.1,1.2,0.1,0.0,32.62963,15.478615,111.7
Draymond Green,30.0,8.2,7.6,2.2,0.7,0.2,0.0,30.586957,7.692308,111.7
Zion Williamson,48.24,31.7,4.4,1.0,0.6,0.2,0.0,32.877551,18.84984,117.8
Brandon Ingram,37.2,25.0,5.0,0.8,0.4,0.0,0.0,34.408163,19.239374,117.8
James Johnson,19.93,8.8,2.7,1.0,1.2,0.0,0.0,19.621622,7.223476,117.8
Nikola Vucevic,41.61,21.8,3.5,1.1,1.0,0.5,0.0,33.666667,17.519685,110.0
Zach LaVine,37.42,24.2,5.6,0.3,0.6,0.1,0.0,35.134615,20.427553,110.0


### Make Predictions using the model

In [26]:
Top_50_predictions = Top_50_Model.predict(Top_50_X)
Top_50_X['FPTS_Pred'] = Top_50_predictions
Top_50_X

Unnamed: 0_level_0,10FPTS,10PTS,10AST,10STL,10BLK,10DD2,10TD3,MIN,10_FGA,2020-2021,FPTS_Pred
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Russell Westbrook,56.1,22.5,13.0,0.9,0.3,1.0,0.9,35.326087,18.574514,107.6,59.792184
Bradley Beal,31.66,23.4,5.0,0.3,0.1,0.0,0.0,34.466667,19.413093,107.6,32.794315
Stephen Curry,47.9,34.6,4.8,1.1,0.1,0.1,0.0,33.956522,21.235521,111.7,44.846998
Andrew Wiggins,33.8,19.6,3.2,1.1,1.2,0.1,0.0,32.62963,15.478615,111.7,29.124486
Draymond Green,30.0,8.2,7.6,2.2,0.7,0.2,0.0,30.586957,7.692308,111.7,29.232563
Zion Williamson,48.24,31.7,4.4,1.0,0.6,0.2,0.0,32.877551,18.84984,117.8,44.200942
Brandon Ingram,37.2,25.0,5.0,0.8,0.4,0.0,0.0,34.408163,19.239374,117.8,37.241887
James Johnson,19.93,8.8,2.7,1.0,1.2,0.0,0.0,19.621622,7.223476,117.8,17.225467
Nikola Vucevic,41.61,21.8,3.5,1.1,1.0,0.5,0.0,33.666667,17.519685,110.0,39.858217
Zach LaVine,37.42,24.2,5.6,0.3,0.6,0.1,0.0,35.134615,20.427553,110.0,36.495193


## Remove the Top 50 Players so we are left with the remaining players for the simpler model

In [27]:
# Create List of the Top 50 players from the total_player_dictionary dataframe

total_player_dictionary = pd.read_csv('player_data_csv.csv')

Top_50_list = list(total_player_dictionary['Name'])

mask_50 = (DK_Data['Name'].isin(Top_50_list))
mask_not_50 = (~DK_Data['Name'].isin(Top_50_list))

DK_Data['Top_50'] = (DK_Data['Name'].isin(Top_50_list))

Rest_of_Players_list = list(mask_not_50*DK_Data['Name'])

Rest_of_Players_list

Rest_of_Players_list = [i for i in Rest_of_Players_list if i]

Rest_of_Players_list

['Rui Hachimura',
 'Deni Avdija',
 'Thomas Bryant',
 'Davis Bertans',
 'Raul Neto',
 'Alex Len',
 'Ish Smith',
 'Robin Lopez',
 'Daniel Gafford',
 'Isaac Bonga',
 'Anthony Gill',
 'Chandler Hutchison',
 'Garrison Mathews',
 'Cassius Winston',
 'Kristaps Porzingis',
 'Dorian Finney-Smith',
 'Tim Hardaway Jr.',
 'Josh Richardson',
 'Jalen Brunson',
 'Maxi Kleber',
 'Boban Marjanovic',
 'Nicolo Melli',
 'Dwight Powell',
 'JJ Redick',
 'Trey Burke',
 'Tyler Bey',
 'Josh Green',
 'Nate Hinton',
 'Willie Cauley-Stein',
 'Tyrell Terry',
 'James Harden',
 'Kyrie Irving',
 'Kevin Durant',
 'Joe Harris',
 'LaMarcus Aldridge',
 'Bruce Brown',
 'Jeff Green',
 'Blake Griffin',
 'DeAndre Jordan',
 'Landry Shamet',
 'Alize Johnson',
 'Timothe Luwawu-Cabarrot',
 'Nicolas Claxton',
 'Tyler Johnson',
 'Spencer Dinwiddie',
 'Chris Chiozza',
 'Reggie Perry',
 'Karl-Anthony Towns',
 'Anthony Edwards',
 "D'Angelo Russell",
 'Malik Beasley',
 'Ricky Rubio',
 'Jaden McDaniels',
 'Jaylen Nowell',
 'Jordan McLa

In [28]:
# Use that list to only keep the players not in the Top 50 performers

DK_Data = DK_Data.set_index('Name')

Rest_of_Players = DK_Data.loc[Rest_of_Players_list]

Rest_of_Players

Unnamed: 0_level_0,Position,Name + ID,ID,Roster Position,Salary,Game Info,TeamAbbrev,AvgPointsPerGame,Away,Home,OPP,Full Name,Abr,Fantasy Data Abbr,2020-2021,2019-2020,2018-2019,Top_50
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Rui Hachimura,PF,Rui Hachimura (17028596),17028596,PF/F/UTIL,6300,WAS@UTA 04/12/2021 09:00PM ET,WAS,25.29,WAS,UTA,UTA,Utah Jazz,Uta,UTA,107.6,109.3,105.3,False
Deni Avdija,SF/PF,Deni Avdija (17028719),17028719,SF/PF/F/UTIL,5000,WAS@UTA 04/12/2021 09:00PM ET,WAS,16.47,WAS,UTA,UTA,Utah Jazz,Uta,UTA,107.6,109.3,105.3,False
Thomas Bryant,C,Thomas Bryant (17028760),17028760,C/UTIL,4600,WAS@UTA 04/12/2021 09:00PM ET,WAS,26.62,WAS,UTA,UTA,Utah Jazz,Uta,UTA,107.6,109.3,105.3,False
Davis Bertans,SF/PF,Davis Bertans (17028863),17028863,SF/PF/F/UTIL,4100,WAS@UTA 04/12/2021 09:00PM ET,WAS,18.88,WAS,UTA,UTA,Utah Jazz,Uta,UTA,107.6,109.3,105.3,False
Raul Neto,PG/SG,Raul Neto (17028884),17028884,PG/SG/G/UTIL,4000,WAS@UTA 04/12/2021 09:00PM ET,WAS,16.74,WAS,UTA,UTA,Utah Jazz,Uta,UTA,107.6,109.3,105.3,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Gary Harris,SG/SF,Gary Harris (17028917),17028917,SG/SF/F/G/UTIL,3900,SA@ORL 04/12/2021 08:00PM ET,ORL,17.43,SA,ORL,SA,San Antonio Spurs,SAS,SA,111.0,112.6,110.6,False
Devin Cannady,PG,Devin Cannady (17028979),17028979,PG/G/UTIL,3500,SA@ORL 04/12/2021 08:00PM ET,ORL,4.00,SA,ORL,SA,San Antonio Spurs,SAS,SA,111.0,112.6,110.6,False
Karim Mane,PG,Karim Mane (17029065),17029065,PG/G/UTIL,3200,SA@ORL 04/12/2021 08:00PM ET,ORL,3.94,SA,ORL,SA,San Antonio Spurs,SAS,SA,111.0,112.6,110.6,False
Jonathan Isaac,PF,Jonathan Isaac (17029196),17029196,PF/F/UTIL,3000,SA@ORL 04/12/2021 08:00PM ET,ORL,0.00,SA,ORL,SA,San Antonio Spurs,SAS,SA,111.0,112.6,110.6,False


## Predict FPTS for Rest of Players Cohort

In [29]:
R_o_P_X = Rest_of_Players.loc[:,['AvgPointsPerGame', '2020-2021']]

R_o_P_X

Unnamed: 0_level_0,AvgPointsPerGame,2020-2021
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Rui Hachimura,25.29,107.6
Deni Avdija,16.47,107.6
Thomas Bryant,26.62,107.6
Davis Bertans,18.88,107.6
Raul Neto,16.74,107.6
...,...,...
Gary Harris,17.43,111.0
Devin Cannady,4.00,111.0
Karim Mane,3.94,111.0
Jonathan Isaac,0.00,111.0


In [30]:
Rest_of_Players_predictions = Rest_of_Players_Model.predict(R_o_P_X)
R_o_P_X['FPTS_Pred'] = Rest_of_Players_predictions
R_o_P_X

Unnamed: 0_level_0,AvgPointsPerGame,2020-2021,FPTS_Pred
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rui Hachimura,25.29,107.6,25.470518
Deni Avdija,16.47,107.6,17.455069
Thomas Bryant,26.62,107.6,26.679197
Davis Bertans,18.88,107.6,19.645231
Raul Neto,16.74,107.6,17.700440
...,...,...,...
Gary Harris,17.43,111.0,19.123648
Devin Cannady,4.00,111.0,6.918718
Karim Mane,3.94,111.0,6.864191
Jonathan Isaac,0.00,111.0,3.283593
