# Rebounds

In [185]:
import os
import pandas as pd
import numpy as np
import scipy.stats as stats
from unidecode import unidecode
from datetime import date

In [186]:
os.chdir('/home/tylerengland/NBA')
os.getcwd()

'/home/tylerengland/NBA'

### Load Data

In [187]:
df = pd.read_csv('backend/data/details/game_details.csv')

In [188]:
df.head()

Unnamed: 0,date,visitor,home,team,starter,player,mp,fg,fga,fg_perc,...,drb,trb,ast,stl,blk,tov,pf,pts,plus_minus,season
0,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Kirk Hinrich,34:38:00,10.0,18.0,0.556,...,4.0,4.0,3.0,2.0,0.0,0.0,2.0,26.0,23.0,
1,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Ben Wallace,28:29:00,2.0,5.0,0.4,...,5.0,11.0,1.0,0.0,1.0,0.0,2.0,5.0,13.0,
2,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Luol Deng,24:07:00,4.0,9.0,0.444,...,1.0,2.0,1.0,1.0,0.0,3.0,2.0,12.0,8.0,
3,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Ben Gordon,22:56,1.0,9.0,0.111,...,1.0,1.0,2.0,1.0,0.0,2.0,3.0,6.0,7.0,
4,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,P.J. Brown,18:23,1.0,2.0,0.5,...,3.0,3.0,1.0,1.0,1.0,2.0,3.0,4.0,15.0,


In [189]:
df.columns

Index(['date', 'visitor', 'home', 'team', 'starter', 'player', 'mp', 'fg',
       'fga', 'fg_perc', '3p', '3pa', '3p_perc', 'ft', 'fta', 'ft_perc', 'orb',
       'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'plus_minus',
       'season'],
      dtype='object')

Drop duplicates and NA's

In [190]:
df.drop_duplicates(subset=['date', 'visitor', 'home', 'team', 'player'], inplace=True)

In [191]:
df.dropna(subset = ['player'], inplace = True)

In [192]:
df.isna().sum()

date               0
visitor            0
home               0
team               0
starter            0
player             0
mp                 0
fg                 0
fga                0
fg_perc        22778
3p                 0
3pa                0
3p_perc       156879
ft                 0
fta                0
ft_perc       189116
orb                0
drb                0
trb                0
ast                0
stl                0
blk                0
tov                0
pf                 0
pts                0
plus_minus        79
season        532543
dtype: int64

Clean data

In [193]:
df['fg_perc'] = df['fg'] / df['fga']
df['3p_perc'] = df['3p'] / df['3pa']
df['ft_perc'] = df['ft'] / df['fta']
df['min'] = df['mp'].apply(lambda x: int(x.split(':')[0]))
df['sec'] = df['mp'].apply(lambda x: int(x.split(':')[-1]))
df['pt'] = df['min'] + df['sec'] / 60
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['player'] = df['player'].apply(lambda x: unidecode(x.strip().lower()))

In [194]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 538186 entries, 0 to 538801
Data columns (total 32 columns):
 #   Column      Non-Null Count   Dtype         
---  ------      --------------   -----         
 0   date        538186 non-null  datetime64[ns]
 1   visitor     538186 non-null  object        
 2   home        538186 non-null  object        
 3   team        538186 non-null  int64         
 4   starter     538186 non-null  float64       
 5   player      538186 non-null  object        
 6   mp          538186 non-null  object        
 7   fg          538186 non-null  float64       
 8   fga         538186 non-null  float64       
 9   fg_perc     424832 non-null  float64       
 10  3p          538186 non-null  float64       
 11  3pa         538186 non-null  float64       
 12  3p_perc     290731 non-null  float64       
 13  ft          538186 non-null  float64       
 14  fta         538186 non-null  float64       
 15  ft_perc     258494 non-null  float64       
 16  or

Feature Engineer

In [195]:
x = df.groupby(['date', 'visitor', 'home', 'team'])[['fga', 'orb', 'tov', 'fta']].transform('sum')
df['possessions'] = x.fga - x.orb + x.tov + 0.4 * x.fta

In [196]:
def get_season(month, year):
    if 10 <= month <= 12:
        return year
    else:
        return year - 1

In [197]:
df['season'] = df.apply(lambda x: get_season(x.month, x.year), 1)
df['opponent'] = np.where(df['team'], df['visitor'], df['home'])

Merge Schedule

In [None]:
schedules = pd.read_csv("backend/data/schedules/2022.csv")
schedules['date'] = pd.to_datetime(schedules['date'])

home_schedule = schedules.copy()
home_schedule['team'] = 1
home_schedule['opponent'] = home_schedule['visitor']

visitor_schedule = schedules.copy()
visitor_schedule['team'] = 0
visitor_schedule['opponent'] = visitor_schedule['home']

schedules = pd.concat([home_schedule, visitor_schedule])

df = pd.merge(schedules, df, left_on = ['date', 'visitor', 'home', 'team', 'opponent'], right_on = ['date', 'visitor', 'home', 'team', 'opponent'], how = 'left')

Teams Defense

In [None]:
df['defense'] = np.where(df['team'], df['visitor'], df['home'])
defense_df = df.groupby(['date', 'defense'])[['trb']].sum().reset_index()

Feature engineer for defense

In [None]:
defense_df['date'] = pd.to_datetime(defense_df['date'])
defense_df['year'] = defense_df['date'].dt.year
defense_df['month'] = defense_df['date'].dt.month
defense_df['season'] = defense_df.apply(lambda x: get_season(x.month, x.year), 1)

Cumulative moving average for teams 

In [None]:
defense_df['rb'] = defense_df.groupby(['season', 'defense'])['trb'].shift(1)
defense_df['trb_sum'] = defense_df.groupby(['season', 'defense'])['rb'].expanding(min_periods=5).sum().sort_index(axis = 0, level = 2).values
defense_df['trb_count'] = defense_df.groupby(['season', 'defense'])['rb'].expanding(min_periods=5).count().sort_index(axis = 0, level = 2).values
defense_df['trb_mean'] = defense_df.groupby(['season', 'defense'])['rb'].expanding(min_periods=5).mean().sort_index(axis = 0, level = 2).values

League average

In [None]:
rb_lg_sum = defense_df.groupby(['season', 'date'])['trb'].sum().groupby(['season']).shift(1)
rb_lg_sum = rb_lg_sum.groupby(['season']).expanding(min_periods=5).sum()
rb_lg_count = defense_df.groupby(['season', 'date'])['trb'].count().groupby(['season']).shift(1)
rb_lg_count = rb_lg_count.groupby(['season']).expanding(min_periods=5).sum()
rb_lg_avg = rb_lg_sum / rb_lg_count
rb_lg_avg.index = defense_df.groupby(['season', 'date'])['trb'].sum().index
rb_lg_avg = rb_lg_avg.reset_index()
defense_df = pd.merge(defense_df, rb_lg_avg, on = ['season', 'date'], how = 'left', suffixes = ('', '_lg_avg'))


Merge in opposing defense

In [None]:
df = pd.merge(
    df, 
    defense_df[['defense', 'date', 'trb_mean', 'trb_lg_avg']], 
    left_on = ['opponent', 'date'], 
    right_on = ['defense', 'date'], 
    how = 'left', 
    suffixes = ('', '_opp')
)

Function to calculate EV

In [None]:
def expected_value(prob, odds):
    return prob * odds - (1 - prob)

Player Analysis

In [None]:
def player_analysis(player, date, df=df):
    # Normalize player name
    player = unidecode(player.strip().lower())

    # Edge cases where there a difference in player names
    edge_cases = {
        'bojan bogdanovich': 'bojan bogdanovic', 'caldwell pope kentavious': 'kentavious caldwell-pope',
        'pj tucker': 'p.j. tucker', 'robert williams iii': 'robert williams',
        'wendell carter': 'wendell carter jr.', 'christian james mccollum': 'cj mccollum',
        'grant jerami': 'jerami grant', 'huerter kevin': 'kevin huerter', 'p. j. tucker': 'p.j. tucker',
        'p.j tucker': 'p.j. tucker', 'nicolas claxton': 'nic claxton', 'cam johnson': 'cameron johnson',
        'rj barrett jr.': 'rj barrett', 'jimmy butler iii': 'jimmy butler', 'dorian finney smith': 'dorian finney-smith',
        'mike conley jr.': 'mike conley', 'anderson kyle': 'kyle anderson', 'jabari smith': 'jabari smith jr.', 
        'kj martin': 'kenyon martin jr.', 'jabari smith jr': 'jabari smith jr.', 'martin jr. k.': 'kenyon martin jr.',
        'alfred joel horford reynoso': 'al horford', 'j. tate': "jae'sean tate", 'anthony davis jr.': 'anthony davis',
        'murray dejounte': 'dejounte murray', 'brook robert lopez': 'brook lopez', 'og anunoby jr.': 'og anunoby',
        'marcus morris sr.': 'marcus morris', 'morris marcus': 'marcus morris', 'kelly oubre jr': 'kelly oubre jr.',
        'kelly oubre': 'kelly oubre jr.', 'dennis smith jr': 'dennis smith jr.', 'trendon watford (por)': 'trendon watford',
        'cameron thomas': 'cam thomas', 'durant kevin': 'kevin durant', 'xavier tillman': 'xavier tillman sr.',
        'k. caldwell-pope': 'kentavious caldwell-pope', 'michael porter': 'michael porter jr.', 'trey murphy': 'trey murphy iii',
        'troy brown jr': 'troy brown jr.', 'troy brown': 'troy brown jr.', 'marvin bagley': 'marvin bagley iii',
        'livers isaiah': 'isaiah livers', 'k. middleton': 'khris middleton', 'portis, bobby': 'bobby portis', 'bobby portis jr.': 'bobby portis',
        'pj washington': 'p.j. washington', 'p.j washington': 'p.j. washington', 'jabari smith ii': 'jabari smith jr.', 
        'kevin porter jr': 'kevin porter jr.', 'kevin porter': 'kevin porter jr.', 's. gilgeous-alexander': 'shai gilgeous-alexander', 
        'v. oladipo': 'victor oladipo', 'tim hardaway': 'tim hardaway jr.', 'westbrook russell': 'russell westbrook'
    }
    player = edge_cases.get(player, player)
    print(player)
    
    player_df = df.loc[(df['player'] == player) & (df['date'] <= date)].sort_values(by=['date'], ascending=True).copy()

    # Keep relevant team data (addresses trades)
    player_df.loc[:, 'team'] = np.where(player_df['team'] == 1, player_df['home'], player_df['visitor'])
    most_recent_team = player_df.loc[player_df['date'] == player_df['date'].max(), 'team'].values[0]
    player_df = player_df.loc[player_df['team'] == most_recent_team, :]

    # Keep games where player played
    player_df = player_df[player_df['pt'] != '0']

    # Add row to hole next game predictions
    player_df.loc['next_game', :] = None

    # Shift opposing defense and league average down a game
    player_df.loc[:, 'trb_mean'] = player_df.loc[:, 'trb_mean'].shift(1)
    player_df.loc[:, 'trb_lg_avg'] = player_df.loc[:, 'trb_lg_avg'].shift(1)

    # Cumulative moving average for rebounds
    player_df['rb'] = player_df['trb'].shift(1) 
    player_df['rb_sum'] = player_df['rb'].expanding(min_periods=5).sum().values
    player_df['rb_count'] = player_df['rb'].expanding(min_periods=5).count().values
    player_df['rb_mean'] = player_df['rb'].expanding(min_periods=5).mean().values

    return player_df


Normalize for opposing defense

In [None]:
def normalize(player_df):
    # Select next game
    mu_df = player_df.iloc[-1, :][['rb_mean', 'trb_mean', 'trb_lg_avg']]

    # # Normalize data
    normalize = 1 + (mu_df['trb_mean'] - mu_df['trb_lg_avg']) / mu_df['trb_lg_avg']
    mu = mu_df['rb_mean'] * normalize

    return mu

Calculate odds

In [None]:
def calculate_expected_value(projection, type, line, price):
    # Poisson distribution probability
    under = stats.poisson.cdf(k = line, mu = projection)
    over = 1 - under

    # Expected value
    if type == 'Over':
        ev = expected_value(over, convert_odds_to_perc(price))
    else:
        ev = expected_value(under, convert_odds_to_perc(price))
    
    return round(ev * 100, 1)



Load in player prop lines for date

In [None]:
next_game_date = date(2023, 3, 8)

In [None]:
home_teams = schedules.loc[schedules['date'] == str(next_game_date), 'home'].to_list()
away_teams = schedules.loc[schedules['date'] == str(next_game_date), 'visitor'].to_list()

In [None]:
player_props = pd.read_csv("backend/data/odds/player_props/rebounds.csv")
player_props['last_updated'] = pd.to_datetime(player_props.loc[:, 'last_updated'])
player_props = player_props.loc[
    (player_props['last_updated'].dt.date == next_game_date) & 
    (player_props['home'].isin(home_teams)) & 
    (player_props['away'].isin(away_teams))
]

Calculate EV and projection for each player and book

In [None]:
player_props['projection'] = player_props.apply(
    lambda row: normalize(player_analysis(row.player, str(next_game_date))),
    axis=1
)
player_props['EV'] = player_props.apply(
    lambda row: calculate_expected_value(row.projection, row.type, row.line, row.price),
    axis=1
)

daniel gafford
daniel gafford
trae young
trae young
clint capela
clint capela
bradley beal
bradley beal
john collins
john collins
kyle kuzma
kyle kuzma
kristaps porzingis
kristaps porzingis
dejounte murray
dejounte murray
de'andre hunter
de'andre hunter
bradley beal
bradley beal
clint capela
clint capela
daniel gafford
daniel gafford
de'andre hunter
de'andre hunter
dejounte murray
dejounte murray
john collins
john collins
kristaps porzingis
kristaps porzingis
kyle kuzma
kyle kuzma
trae young
trae young
bogdan bogdanovic
bogdan bogdanovic
bradley beal
bradley beal
clint capela
clint capela
daniel gafford
daniel gafford
de'andre hunter
de'andre hunter
dejounte murray
dejounte murray
john collins
john collins
kristaps porzingis
kristaps porzingis
kyle kuzma
kyle kuzma
onyeka okongwu
onyeka okongwu
saddiq bey
saddiq bey
trae young
trae young
clint capela
clint capela
de'andre hunter
de'andre hunter
dejounte murray
dejounte murray
trae young
trae young
kyle kuzma
kyle kuzma
daniel gafford
d

Players with +EV

In [None]:
pd.set_option('display.max_rows', None)
player_props.loc[player_props['EV'] >= 10, :].sort_values(by=['date', 'home', 'player', 'book'])

Unnamed: 0,date,home,away,player,type,line,price,book,last_updated,projection,EV
7706,2023-03-09T00:10:00Z,Washington Wizards,Atlanta Hawks,John Collins,Over,4.5,110,BetMGM,2023-03-08 19:20:17+00:00,6.49127,14.4
7767,2023-03-09T00:10:00Z,Washington Wizards,Atlanta Hawks,John Collins,Over,4.5,110,BetOnline.ag,2023-03-08 19:22:05+00:00,6.49127,14.4
7742,2023-03-09T00:10:00Z,Washington Wizards,Atlanta Hawks,John Collins,Over,4.5,120,Bovada,2023-03-08 19:25:16+00:00,6.49127,12.8
7678,2023-03-09T00:10:00Z,Washington Wizards,Atlanta Hawks,John Collins,Over,4.5,110,DraftKings,2023-03-08 19:24:12+00:00,6.49127,14.4
7638,2023-03-09T00:10:00Z,Washington Wizards,Atlanta Hawks,John Collins,Over,4.5,122,FanDuel,2023-03-08 19:24:57+00:00,6.49127,12.4
7658,2023-03-09T00:10:00Z,Washington Wizards,Atlanta Hawks,John Collins,Over,4.5,115,PointsBet (US),2023-03-08 19:22:47+00:00,6.49127,13.6
7714,2023-03-09T00:10:00Z,Washington Wizards,Atlanta Hawks,John Collins,Over,4.5,112,William Hill (US),2023-03-08 19:24:40+00:00,6.49127,14.1
7746,2023-03-09T00:10:00Z,Washington Wizards,Atlanta Hawks,Kyle Kuzma,Over,5.5,-150,Bovada,2023-03-08 19:25:16+00:00,7.489203,21.2
7640,2023-03-09T00:10:00Z,Washington Wizards,Atlanta Hawks,Kyle Kuzma,Over,5.5,-142,FanDuel,2023-03-08 19:24:57+00:00,7.489203,20.2
7662,2023-03-09T00:10:00Z,Washington Wizards,Atlanta Hawks,Kyle Kuzma,Over,5.5,-145,PointsBet (US),2023-03-08 19:22:47+00:00,7.489203,20.6
