# Scrape betting odds and statistics for MVP candidates in current season
- odds are taken from https://sportsbook.draftkings.com/leagues/2/42648?category=awards&subcategory=regular-season-mvp&wpsrc=Organic%20Search&wpaffn=Google&wpkw=https%3A%2F%2Fsportsbook.draftkings.com%2Fleagues%2F2%2F42648%3Fcategory%3Dawards%26subcategory%3Dregular-season-mvp&wpcn=leagues&wpscn=2%2F42648

In [12]:
import numpy as np
import requests
from bs4 import BeautifulSoup
import pandas as pd
from basketball_reference_web_scraper import client
from unidecode import unidecode
import os
import joblib
import tensorflow as tf
from tensorflow import keras
from keras.models import load_model
import pickle
import warnings
warnings.filterwarnings('ignore')

## Scrape current odds from DraftKings

In [2]:
# link with odds
url = "https://sportsbook.draftkings.com/event/nba-awards-2023-24/6fe78ab7-324d-4d1a-7f10-08db724c2a58"

In [13]:
# Send a GET request to the URL
response = requests.get(url)

# Parse the HTML content
soup = BeautifulSoup(response.text, 'html.parser')
    
# Find the section containing "Regular Season MVP"
mvp_section = soup.find('ul', {'class': 'game-props-card17'})  

In [143]:
# Extract data from the section
data = []
player_list = mvp_section.find_all('li', {'class': 'game-props-card17__cell'})
    
for player in player_list:
    player_name = player.find('span', {'class': 'sportsbook-outcome-cell__label'})
    odds = player.find('span', {'class': 'sportsbook-odds'})
            
    if player_name and odds:
        data.append({
            'name': player_name.text.strip(),
            'odds': odds.text.strip()
        })

# Create a pandas dataframe, take only top 10 candidates
odds = pd.DataFrame(data, columns=['name', 'odds']).head(10)
            
odds

Unnamed: 0,name,odds
0,Joel Embiid,120
1,Nikola Jokic,250
2,Shai Gilgeous-Alexander,400
3,Luka Doncic,900
4,Giannis Antetokounmpo,900
5,Jayson Tatum,3500
6,Kevin Durant,6000
7,Anthony Edwards,8000
8,Devin Booker,8000
9,De'Aaron Fox,10000


#### Function to scrape odds

In [3]:
def scrape_odds(url):
    response = requests.get(url)

    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find the section containing "Regular Season MVP"
    mvp_section = soup.find('ul', {'class': 'game-props-card17'}) #adjust class accordingly as site changes

    data = []
    player_list = mvp_section.find_all('li', {'class': 'game-props-card17__cell'})
    for player in player_list:
        player_name = player.find('span', {'class': 'sportsbook-outcome-cell__label'})
        odds = player.find('span', {'class': 'sportsbook-odds'})
                
        if player_name and odds:
            data.append({
                'name': player_name.text.strip(),
                'odds': odds.text.strip()
            })
    
    odds = pd.DataFrame(data, columns=['name', 'odds']).head(10)
    cands = odds['name']
    return odds, cands

## Get statistics for candidates

### Basic Stats

In [130]:
cands = odds['name']

In [11]:
stats1 = pd.DataFrame(client.players_season_totals(season_end_year=2024))
stats1['name'] = stats1['name'].apply(lambda x: unidecode(x))
stats2 = stats1[stats1['name'].isin(cands)]
stats2['PTS'] = stats2['points'] / stats2['games_played']
stats2['REB'] = (stats2['offensive_rebounds'] + stats2['defensive_rebounds']) / stats2['games_played']
stats2['AST'] = stats2['assists'] / stats2['games_played']
stats2['PRA'] = round(stats2['PTS'] + stats2['REB'] + stats2['AST'], 3)
stats2

Unnamed: 0,slug,name,positions,age,team,games_played,games_started,minutes_played,made_field_goals,attempted_field_goals,...,assists,steals,blocks,turnovers,personal_fouls,points,PTS,REB,AST,PRA
10,antetgi01,Giannis Antetokounmpo,[Position.POWER_FORWARD],29,Team.MILWAUKEE_BUCKS,42,42,1479,489,808,...,261,56,48,151,122,1313,31.261905,11.666667,6.214286,49.143
55,bookede01,Devin Booker,[Position.POINT_GUARD],27,Team.PHOENIX_SUNS,35,35,1262,331,670,...,263,31,11,96,105,953,27.228571,4.885714,7.514286,39.629
126,doncilu01,Luka Doncic,[Position.POINT_GUARD],24,Team.DALLAS_MAVERICKS,37,37,1366,423,878,...,343,52,23,146,66,1242,33.567568,8.513514,9.27027,51.351
132,duranke01,Kevin Durant,[Position.POWER_FORWARD],35,Team.PHOENIX_SUNS,37,37,1373,382,722,...,210,32,46,115,71,1076,29.081081,6.405405,5.675676,41.162
135,edwaran01,Anthony Edwards,[Position.SHOOTING_GUARD],22,Team.MINNESOTA_TIMBERWOLVES,41,41,1427,363,787,...,212,50,22,140,78,1057,25.780488,5.292683,5.170732,36.244
138,embiijo01,Joel Embiid,[Position.CENTER],29,Team.PHILADELPHIA_76ERS,32,32,1096,386,716,...,188,37,60,116,92,1156,36.125,11.5625,5.875,53.562
149,foxde01,De'Aaron Fox,[Position.POINT_GUARD],26,Team.SACRAMENTO_KINGS,36,36,1281,351,755,...,203,57,13,91,90,988,27.444444,4.138889,5.638889,37.222
165,gilgesh01,Shai Gilgeous-Alexander,[Position.POINT_GUARD],25,Team.OKLAHOMA_CITY_THUNDER,43,43,1480,471,862,...,277,98,33,89,111,1339,31.139535,5.627907,6.44186,43.209
248,jokicni01,Nikola Jokic,[Position.CENTER],28,Team.DENVER_NUGGETS,44,44,1480,450,763,...,401,50,41,125,116,1153,26.204545,11.954545,9.113636,47.273
475,tatumja01,Jayson Tatum,[Position.POWER_FORWARD],25,Team.BOSTON_CELTICS,41,41,1474,372,794,...,181,42,21,104,86,1107,27.0,8.390244,4.414634,39.805


In [132]:
df1_cols = ['name', 'PRA']
df1 = stats2[df1_cols]

#### Function to scrape basic stats

In [4]:
def scrape_basic(year):
    stats1 = pd.DataFrame(client.players_season_totals(season_end_year=year))
    stats1['name'] = stats1['name'].apply(lambda x: unidecode(x))
    stats2 = stats1[stats1['name'].isin(cands)]
    stats2['PTS'] = stats2['points'] / stats2['games_played']
    stats2['REB'] = (stats2['offensive_rebounds'] + stats2['defensive_rebounds']) / stats2['games_played']
    stats2['AST'] = stats2['assists'] / stats2['games_played']
    stats2['PRA'] = round(stats2['PTS'] + stats2['REB'] + stats2['AST'], 3)
    basic_cols = ['name', 'PRA']
    df1 = stats2[basic_cols]
    return df1

### Advanced Stats

In [133]:
adv1 = pd.DataFrame(client.players_advanced_season_totals(season_end_year=2024))
adv1['name'] = adv1['name'].apply(lambda x: unidecode(x))
adv2 = adv1[adv1['name'].isin(cands)]
adv2['WS/48'] = adv2['win_shares_per_48_minutes']
teams = []

for i in adv2['team']:
    tm = i.value
    teams.append(tm)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adv2['WS/48'] = adv2['win_shares_per_48_minutes']


In [134]:
adv2['team'] = teams
adv2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adv2['team'] = teams


Unnamed: 0,slug,name,positions,age,team,games_played,minutes_played,player_efficiency_rating,true_shooting_percentage,three_point_attempt_rate,...,offensive_win_shares,defensive_win_shares,win_shares,win_shares_per_48_minutes,offensive_box_plus_minus,defensive_box_plus_minus,box_plus_minus,value_over_replacement_player,is_combined_totals,WS/48
10,antetgi01,Giannis Antetokounmpo,[Position.POWER_FORWARD],29,MILWAUKEE BUCKS,42,1479,29.7,0.646,0.093,...,5.2,2.0,7.3,0.236,6.3,1.8,8.2,3.8,False,0.236
55,bookede01,Devin Booker,[Position.POINT_GUARD],27,PHOENIX SUNS,35,1262,22.4,0.613,0.301,...,3.8,1.0,4.8,0.182,4.1,-1.3,2.8,1.5,False,0.182
126,doncilu01,Luka Doncic,[Position.POINT_GUARD],24,DALLAS MAVERICKS,37,1366,27.5,0.607,0.436,...,3.9,1.6,5.5,0.194,8.3,1.2,9.5,4.0,False,0.194
132,duranke01,Kevin Durant,[Position.POWER_FORWARD],35,PHOENIX SUNS,37,1373,23.8,0.643,0.259,...,3.7,1.5,5.2,0.18,6.0,0.1,6.1,2.8,False,0.18
135,edwaran01,Anthony Edwards,[Position.SHOOTING_GUARD],22,MINNESOTA TIMBERWOLVES,41,1427,19.2,0.582,0.334,...,1.2,2.4,3.6,0.122,2.5,0.5,3.0,1.8,False,0.122
138,embiijo01,Joel Embiid,[Position.CENTER],29,PHILADELPHIA 76ERS,32,1096,35.5,0.651,0.142,...,5.1,2.2,7.3,0.319,9.6,3.5,13.1,4.2,False,0.319
149,foxde01,De'Aaron Fox,[Position.POINT_GUARD],26,SACRAMENTO KINGS,36,1281,20.1,0.574,0.387,...,1.8,1.2,3.1,0.115,3.0,-0.8,2.2,1.4,False,0.115
165,gilgesh01,Shai Gilgeous-Alexander,[Position.POINT_GUARD],25,OKLAHOMA CITY THUNDER,43,1480,30.7,0.647,0.169,...,6.7,2.5,9.3,0.3,7.3,2.7,10.0,4.5,False,0.3
248,jokicni01,Nikola Jokic,[Position.CENTER],28,DENVER NUGGETS,44,1480,32.1,0.66,0.166,...,7.2,2.5,9.7,0.315,9.9,4.4,14.4,6.2,False,0.315
475,tatumja01,Jayson Tatum,[Position.POWER_FORWARD],25,BOSTON CELTICS,41,1474,21.6,0.599,0.429,...,3.0,2.4,5.4,0.177,3.9,0.9,4.8,2.5,False,0.177


#### function to scrape Advanced Stats

In [5]:
def scrape_adv(year):
    adv1 = pd.DataFrame(client.players_advanced_season_totals(season_end_year=year))
    adv1['name'] = adv1['name'].apply(lambda x: unidecode(x))
    adv2 = adv1[adv1['name'].isin(cands)]
    adv2['WS/48'] = adv2['win_shares_per_48_minutes']
    teams = []

    for i in adv2['team']:
        tm = i.value
        teams.append(tm)
    adv2['team'] = teams

    return adv2

## Scrape live standings, merge with advanced statistics

In [135]:
# load standings from basketball webscraper
standings = pd.DataFrame(client.standings(season_end_year=2024))
standings['wl_pct'] = standings['wins']/(standings['wins']+standings['losses'])
standings['conference'] = pd.Categorical(standings['conference'])
standings['seed'] = float('nan')
standings['seed'] = standings.groupby('conference')['wl_pct'].rank(ascending=False, method='min')
standings['seed'] = standings['seed'].astype(int)
standings['team'] = standings['team'].apply(lambda x: x.value)
standings

Unnamed: 0,team,wins,losses,division,conference,wl_pct,seed
0,BOSTON CELTICS,34,10,Division.ATLANTIC,Conference.EASTERN,0.772727,1
1,PHILADELPHIA 76ERS,29,13,Division.ATLANTIC,Conference.EASTERN,0.690476,3
2,NEW YORK KNICKS,27,17,Division.ATLANTIC,Conference.EASTERN,0.613636,5
3,BROOKLYN NETS,17,26,Division.ATLANTIC,Conference.EASTERN,0.395349,11
4,TORONTO RAPTORS,16,28,Division.ATLANTIC,Conference.EASTERN,0.363636,12
5,MILWAUKEE BUCKS,31,13,Division.CENTRAL,Conference.EASTERN,0.704545,2
6,CLEVELAND CAVALIERS,26,16,Division.CENTRAL,Conference.EASTERN,0.619048,4
7,INDIANA PACERS,24,20,Division.CENTRAL,Conference.EASTERN,0.545455,6
8,CHICAGO BULLS,21,24,Division.CENTRAL,Conference.EASTERN,0.466667,9
9,DETROIT PISTONS,5,39,Division.CENTRAL,Conference.EASTERN,0.113636,15


#### Function to scrape standings

In [6]:
def scrape_standings(year):
    standings = pd.DataFrame(client.standings(season_end_year=year))
    standings['wl_pct'] = standings['wins']/(standings['wins']+standings['losses'])
    standings['conference'] = pd.Categorical(standings['conference'])
    standings['seed'] = float('nan')
    standings['seed'] = standings.groupby('conference')['wl_pct'].rank(ascending=False, method='min')
    standings['seed'] = standings['seed'].astype(int)
    standings['team'] = standings['team'].apply(lambda x: x.value)
    return standings

In [136]:
# combine seed and wl_pct with adv stats of cands
adv2_cols = ['name', 'team', 'WS/48', 'player_efficiency_rating', 'offensive_box_plus_minus',
       'value_over_replacement_player']
stand_cols = ['team', 'wl_pct', 'seed']
df2 = pd.merge(adv2[adv2_cols], standings[stand_cols], on='team', how='inner')
df2

Unnamed: 0,name,team,WS/48,player_efficiency_rating,offensive_box_plus_minus,value_over_replacement_player,wl_pct,seed
0,Giannis Antetokounmpo,MILWAUKEE BUCKS,0.236,29.7,6.3,3.8,0.704545,2
1,Devin Booker,PHOENIX SUNS,0.182,22.4,4.1,1.5,0.590909,5
2,Kevin Durant,PHOENIX SUNS,0.18,23.8,6.0,2.8,0.590909,5
3,Luka Doncic,DALLAS MAVERICKS,0.194,27.5,8.3,4.0,0.545455,8
4,Anthony Edwards,MINNESOTA TIMBERWOLVES,0.122,19.2,2.5,1.8,0.704545,1
5,Joel Embiid,PHILADELPHIA 76ERS,0.319,35.5,9.6,4.2,0.690476,3
6,De'Aaron Fox,SACRAMENTO KINGS,0.115,20.1,3.0,1.4,0.571429,7
7,Shai Gilgeous-Alexander,OKLAHOMA CITY THUNDER,0.3,30.7,7.3,4.5,0.704545,1
8,Nikola Jokic,DENVER NUGGETS,0.315,32.1,9.9,6.2,0.688889,3
9,Jayson Tatum,BOSTON CELTICS,0.177,21.6,3.9,2.5,0.772727,1


In [137]:
df = pd.merge(df1, df2, on='name', how='inner')
df

Unnamed: 0,name,PRA,team,WS/48,player_efficiency_rating,offensive_box_plus_minus,value_over_replacement_player,wl_pct,seed
0,Giannis Antetokounmpo,49.143,MILWAUKEE BUCKS,0.236,29.7,6.3,3.8,0.704545,2
1,Devin Booker,39.629,PHOENIX SUNS,0.182,22.4,4.1,1.5,0.590909,5
2,Luka Doncic,51.351,DALLAS MAVERICKS,0.194,27.5,8.3,4.0,0.545455,8
3,Kevin Durant,41.162,PHOENIX SUNS,0.18,23.8,6.0,2.8,0.590909,5
4,Anthony Edwards,36.244,MINNESOTA TIMBERWOLVES,0.122,19.2,2.5,1.8,0.704545,1
5,Joel Embiid,53.562,PHILADELPHIA 76ERS,0.319,35.5,9.6,4.2,0.690476,3
6,De'Aaron Fox,37.222,SACRAMENTO KINGS,0.115,20.1,3.0,1.4,0.571429,7
7,Shai Gilgeous-Alexander,43.209,OKLAHOMA CITY THUNDER,0.3,30.7,7.3,4.5,0.704545,1
8,Nikola Jokic,47.273,DENVER NUGGETS,0.315,32.1,9.9,6.2,0.688889,3
9,Jayson Tatum,39.805,BOSTON CELTICS,0.177,21.6,3.9,2.5,0.772727,1


#### Function to merge final dataframes

In [7]:
def merge_dfs(basic, adv, standings):
    # combine seed and wl_pct with adv stats of cands
    adv_cols = ['name', 'team', 'WS/48', 'player_efficiency_rating', 'offensive_box_plus_minus',
        'value_over_replacement_player']
    stand_cols = ['team', 'wl_pct', 'seed']
    df2 = pd.merge(adv[adv_cols], standings[stand_cols], on='team', how='inner')
    df = pd.merge(basic, df2, on='name', how='inner')
    return df
    

In [10]:
# link with odds
url = "https://sportsbook.draftkings.com/event/nba-awards-2023-24/6fe78ab7-324d-4d1a-7f10-08db724c2a58"
odds, cands = scrape_odds(url)
basic_stats = scrape_basic(2024)
adv_stats = scrape_adv(2024)
standings = scrape_standings(2024)
df = merge_dfs(basic_stats, adv_stats, standings)
df

Unnamed: 0,name,PRA,team,WS/48,player_efficiency_rating,offensive_box_plus_minus,value_over_replacement_player,wl_pct,seed
0,Giannis Antetokounmpo,49.143,MILWAUKEE BUCKS,0.236,29.7,6.3,3.8,0.704545,2
1,Devin Booker,39.629,PHOENIX SUNS,0.182,22.4,4.1,1.5,0.590909,5
2,Luka Doncic,51.351,DALLAS MAVERICKS,0.194,27.5,8.3,4.0,0.545455,8
3,Kevin Durant,41.162,PHOENIX SUNS,0.18,23.8,6.0,2.8,0.590909,5
4,Anthony Edwards,36.244,MINNESOTA TIMBERWOLVES,0.122,19.2,2.5,1.8,0.704545,1
5,Joel Embiid,53.562,PHILADELPHIA 76ERS,0.319,35.5,9.6,4.2,0.690476,3
6,De'Aaron Fox,37.222,SACRAMENTO KINGS,0.115,20.1,3.0,1.4,0.571429,7
7,Shai Gilgeous-Alexander,43.209,OKLAHOMA CITY THUNDER,0.3,30.7,7.3,4.5,0.704545,1
8,Nikola Jokic,47.273,DENVER NUGGETS,0.315,32.1,9.9,6.2,0.688889,3
9,Jayson Tatum,39.805,BOSTON CELTICS,0.177,21.6,3.9,2.5,0.772727,1


## Predict with best RF model

In [138]:
rf = joblib.load("./rf_best.joblib")
X_cols = ['PRA',
 'WS/48',
 'player_efficiency_rating',
 'offensive_box_plus_minus',
 'value_over_replacement_player',
 'wl_pct',
 'seed']

X = df[X_cols]
preds = rf.predict(X)

In [149]:
res1 = pd.DataFrame()
res1['name'] = df['name']
res1['share_pred'] = preds
res2 = res1.sort_values(by='share_pred', ascending=False)
res = pd.merge(res2, odds, on='name', how='inner')

In [150]:
res

Unnamed: 0,name,share_pred,odds
0,Nikola Jokic,0.663677,250
1,Shai Gilgeous-Alexander,0.65055,400
2,Joel Embiid,0.630212,120
3,Giannis Antetokounmpo,0.476995,900
4,Jayson Tatum,0.330158,3500
5,Luka Doncic,0.180994,900
6,Anthony Edwards,0.141886,8000
7,Kevin Durant,0.090121,6000
8,Devin Booker,0.071543,8000
9,De'Aaron Fox,0.042519,10000


## Predict with best NN model

In [146]:
load_folder = 'NN_models'
num_models = 5
loaded_models = []

for i in range(num_models):
    # Load the model from the 'NN_models' folder
    model = load_model(os.path.join(load_folder, f'model_{i + 1}.h5'))
    loaded_models.append(model)

In [147]:
nn_preds = np.mean([model.predict(X) for model in loaded_models], axis=0)



In [148]:
nn_res1 = pd.DataFrame()
nn_res1['name'] = df['name']
nn_res1['share_pred'] = nn_preds
nn_res2 = nn_res1.sort_values(by='share_pred', ascending=False)
nn_res = pd.merge(nn_res2, odds, on='name', how='inner')
nn_res

Unnamed: 0,name,share_pred,odds
0,Joel Embiid,0.664256,120
1,Nikola Jokic,0.642028,250
2,Shai Gilgeous-Alexander,0.473401,400
3,Giannis Antetokounmpo,0.430123,900
4,Luka Doncic,0.051118,900
5,Jayson Tatum,0.035692,3500
6,Kevin Durant,0.028875,6000
7,Anthony Edwards,0.025859,8000
8,Devin Booker,0.018377,8000
9,De'Aaron Fox,0.002929,10000


In [124]:
with open('nn_results_year.pkl', 'rb') as fp:
    nn_results_year = pickle.load(fp)
#nn_results_year[2023]

In [125]:
nn_results_year[2023]

Unnamed: 0_level_0,Share,prediction
Player,Unnamed: 1_level_1,Unnamed: 2_level_1
Joel Embiid,0.915,0.593989
Nikola Jokić,0.674,0.693613
Giannis Antetokounmpo,0.606,0.618509
Jayson Tatum,0.28,0.381494
Shai Gilgeous-Alexander,0.046,0.007228
Donovan Mitchell,0.03,0.043665
Domantas Sabonis,0.027,0.203329
Luka Dončić,0.01,0.007467
Stephen Curry,0.005,0.016571
Jimmy Butler,0.003,0.00819
