# Rebounds

In [1]:
import os
import pandas as pd
import numpy as np
import scipy.stats as stats
from unidecode import unidecode
from datetime import date

In [2]:
os.chdir('/home/tylerengland/NBA')
os.getcwd()

'/home/tylerengland/NBA'

### Load Data

In [3]:
df = pd.read_csv('backend/data/details/game_details.csv')

In [4]:
df.head()

Unnamed: 0,date,visitor,home,team,starter,player,mp,fg,fga,fg_perc,...,drb,trb,ast,stl,blk,tov,pf,pts,plus_minus,season
0,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Kirk Hinrich,34:38:00,10.0,18.0,0.556,...,4.0,4.0,3.0,2.0,0.0,0.0,2.0,26.0,23.0,
1,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Ben Wallace,28:29:00,2.0,5.0,0.4,...,5.0,11.0,1.0,0.0,1.0,0.0,2.0,5.0,13.0,
2,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Luol Deng,24:07:00,4.0,9.0,0.444,...,1.0,2.0,1.0,1.0,0.0,3.0,2.0,12.0,8.0,
3,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Ben Gordon,22:56,1.0,9.0,0.111,...,1.0,1.0,2.0,1.0,0.0,2.0,3.0,6.0,7.0,
4,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,P.J. Brown,18:23,1.0,2.0,0.5,...,3.0,3.0,1.0,1.0,1.0,2.0,3.0,4.0,15.0,


In [5]:
df.columns

Index(['date', 'visitor', 'home', 'team', 'starter', 'player', 'mp', 'fg',
       'fga', 'fg_perc', '3p', '3pa', '3p_perc', 'ft', 'fta', 'ft_perc', 'orb',
       'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'plus_minus',
       'season'],
      dtype='object')

Drop duplicates and NA's

In [6]:
df.drop_duplicates(subset=['date', 'visitor', 'home', 'team', 'player'], inplace=True)

In [7]:
df.dropna(subset = ['player'], inplace = True)

In [8]:
df.isna().sum()

date               0
visitor            0
home               0
team               0
starter            0
player             0
mp                 0
fg                 0
fga                0
fg_perc        22791
3p                 0
3pa                0
3p_perc       156958
ft                 0
fta                0
ft_perc       189274
orb                0
drb                0
trb                0
ast                0
stl                0
blk                0
tov                0
pf                 0
pts                0
plus_minus        79
season        532543
dtype: int64

Clean data

In [9]:
df['fg_perc'] = df['fg'] / df['fga']
df['3p_perc'] = df['3p'] / df['3pa']
df['ft_perc'] = df['ft'] / df['fta']
df['min'] = df['mp'].apply(lambda x: int(x.split(':')[0]))
df['sec'] = df['mp'].apply(lambda x: int(x.split(':')[-1]))
df['pt'] = df['min'] + df['sec'] / 60
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['player'] = df['player'].apply(lambda x: unidecode(x.strip().lower()))

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 538673 entries, 0 to 539288
Data columns (total 32 columns):
 #   Column      Non-Null Count   Dtype         
---  ------      --------------   -----         
 0   date        538673 non-null  datetime64[ns]
 1   visitor     538673 non-null  object        
 2   home        538673 non-null  object        
 3   team        538673 non-null  int64         
 4   starter     538673 non-null  float64       
 5   player      538673 non-null  object        
 6   mp          538673 non-null  object        
 7   fg          538673 non-null  float64       
 8   fga         538673 non-null  float64       
 9   fg_perc     425205 non-null  float64       
 10  3p          538673 non-null  float64       
 11  3pa         538673 non-null  float64       
 12  3p_perc     291038 non-null  float64       
 13  ft          538673 non-null  float64       
 14  fta         538673 non-null  float64       
 15  ft_perc     258722 non-null  float64       
 16  or

Feature Engineer

In [11]:
x = df.groupby(['date', 'visitor', 'home', 'team'])[['fga', 'orb', 'tov', 'fta']].transform('sum')
df['possessions'] = x.fga - x.orb + x.tov + 0.4 * x.fta

In [12]:
def get_season(month, year):
    if 10 <= month <= 12:
        return year
    else:
        return year - 1

In [13]:
df['season'] = df.apply(lambda x: get_season(x.month, x.year), 1)
df['opponent'] = np.where(df['team'], df['visitor'], df['home'])

Merge Schedule

In [14]:
schedules = pd.read_csv("backend/data/schedules/2022.csv")
schedules['date'] = pd.to_datetime(schedules['date'])

home_schedule = schedules.copy()
home_schedule['team'] = 1
home_schedule['opponent'] = home_schedule['visitor']

visitor_schedule = schedules.copy()
visitor_schedule['team'] = 0
visitor_schedule['opponent'] = visitor_schedule['home']

schedules = pd.concat([home_schedule, visitor_schedule])

df = pd.merge(schedules, df, left_on = ['date', 'visitor', 'home', 'team', 'opponent'], right_on = ['date', 'visitor', 'home', 'team', 'opponent'], how = 'left')

Teams Defense

In [15]:
df.columns

Index(['date', 'visitor', 'home', 'season_x', 'team', 'opponent', 'starter',
       'player', 'mp', 'fg', 'fga', 'fg_perc', '3p', '3pa', '3p_perc', 'ft',
       'fta', 'ft_perc', 'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf',
       'pts', 'plus_minus', 'season_y', 'min', 'sec', 'pt', 'year', 'month',
       'possessions'],
      dtype='object')

In [16]:
df['defense'] = np.where(df['team'], df['visitor'], df['home'])
defense_df = df.groupby(['date', 'defense'])[['trb']].sum().reset_index()

In [17]:
df['defense'] = np.where(df['team'], df['visitor'], df['home'])
defense_df = df.groupby(['date', 'defense'])[['trb']].sum().reset_index()

Feature engineer for defense

In [18]:
defense_df['date'] = pd.to_datetime(defense_df['date'])
defense_df['year'] = defense_df['date'].dt.year
defense_df['month'] = defense_df['date'].dt.month
defense_df['season'] = defense_df.apply(lambda x: get_season(x.month, x.year), 1)

Cumulative moving average for teams 

In [19]:
defense_df['rb'] = defense_df.groupby(['season', 'defense'])['trb'].shift(1)
defense_df['trb_sum'] = defense_df.groupby(['season', 'defense'])['rb'].expanding(min_periods=5).sum().sort_index(axis = 0, level = 2).values
defense_df['trb_count'] = defense_df.groupby(['season', 'defense'])['rb'].expanding(min_periods=5).count().sort_index(axis = 0, level = 2).values
defense_df['trb_mean'] = defense_df.groupby(['season', 'defense'])['rb'].expanding(min_periods=5).mean().sort_index(axis = 0, level = 2).values

League average

In [20]:
rb_lg_sum = defense_df.groupby(['season', 'date'])['trb'].sum().groupby(['season']).shift(1)
rb_lg_sum = rb_lg_sum.groupby(['season']).expanding(min_periods=5).sum()
rb_lg_count = defense_df.groupby(['season', 'date'])['trb'].count().groupby(['season']).shift(1)
rb_lg_count = rb_lg_count.groupby(['season']).expanding(min_periods=5).sum()
rb_lg_avg = rb_lg_sum / rb_lg_count
rb_lg_avg.index = defense_df.groupby(['season', 'date'])['trb'].sum().index
rb_lg_avg = rb_lg_avg.reset_index()
defense_df = pd.merge(defense_df, rb_lg_avg, on = ['season', 'date'], how = 'left', suffixes = ('', '_lg_avg'))


Merge in opposing defense

In [21]:
df = pd.merge(
    df, 
    defense_df[['defense', 'date', 'trb_mean', 'trb_lg_avg']], 
    left_on = ['opponent', 'date'], 
    right_on = ['defense', 'date'], 
    how = 'left', 
    suffixes = ('', '_opp')
)

Player Analysis

In [22]:
def player_analysis(player, date, df=df):
    # Normalize player name
    player = unidecode(player.strip().lower())

    # Edge cases where there a difference in player names
    edge_cases = {
        'bojan bogdanovich': 'bojan bogdanovic', 'caldwell pope kentavious': 'kentavious caldwell-pope',
        'pj tucker': 'p.j. tucker', 'robert williams iii': 'robert williams',
        'wendell carter': 'wendell carter jr.', 'christian james mccollum': 'cj mccollum',
        'grant jerami': 'jerami grant', 'huerter kevin': 'kevin huerter', 'p. j. tucker': 'p.j. tucker',
        'p.j tucker': 'p.j. tucker', 'nicolas claxton': 'nic claxton', 'cam johnson': 'cameron johnson',
        'rj barrett jr.': 'rj barrett', 'jimmy butler iii': 'jimmy butler', 'dorian finney smith': 'dorian finney-smith',
        'mike conley jr.': 'mike conley', 'anderson kyle': 'kyle anderson', 'jabari smith': 'jabari smith jr.', 
        'kj martin': 'kenyon martin jr.', 'jabari smith jr': 'jabari smith jr.', 'martin jr. k.': 'kenyon martin jr.',
        'alfred joel horford reynoso': 'al horford', 'j. tate': "jae'sean tate", 'anthony davis jr.': 'anthony davis',
        'murray dejounte': 'dejounte murray', 'brook robert lopez': 'brook lopez', 'og anunoby jr.': 'og anunoby',
        'marcus morris sr.': 'marcus morris', 'morris marcus': 'marcus morris', 'kelly oubre jr': 'kelly oubre jr.',
        'kelly oubre': 'kelly oubre jr.', 'dennis smith jr': 'dennis smith jr.', 'trendon watford (por)': 'trendon watford',
        'cameron thomas': 'cam thomas', 'durant kevin': 'kevin durant', 'xavier tillman': 'xavier tillman sr.',
        'k. caldwell-pope': 'kentavious caldwell-pope', 'michael porter': 'michael porter jr.', 'trey murphy': 'trey murphy iii',
        'troy brown jr': 'troy brown jr.', 'troy brown': 'troy brown jr.', 'marvin bagley': 'marvin bagley iii',
        'livers isaiah': 'isaiah livers', 'k. middleton': 'khris middleton', 'portis, bobby': 'bobby portis', 'bobby portis jr.': 'bobby portis',
        'pj washington': 'p.j. washington', 'p.j washington': 'p.j. washington', 'jabari smith ii': 'jabari smith jr.', 
        'kevin porter jr': 'kevin porter jr.', 'kevin porter': 'kevin porter jr.', 's. gilgeous-alexander': 'shai gilgeous-alexander', 
        'v. oladipo': 'victor oladipo', 'tim hardaway': 'tim hardaway jr.', 'westbrook russell': 'russell westbrook', 
        'p.j. washington jr.': 'p.j. washington', 'g. antetokounmpo': 'giannis antetokounmpo'
    }
    player = edge_cases.get(player, player)
    print(player)
    
    player_df = df.loc[(df['player'] == player) & (df['date'] <= date)].sort_values(by=['date'], ascending=True).copy()

    # Keep relevant team data (addresses trades)
    player_df.loc[:, 'team'] = np.where(player_df['team'] == 1, player_df['home'], player_df['visitor'])
    most_recent_team = player_df.loc[player_df['date'] == player_df['date'].max(), 'team'].values[0]
    player_df = player_df.loc[player_df['team'] == most_recent_team, :]

    # Keep games where player played
    player_df = player_df[player_df['pt'] != '0']

    # Add row to hole next game predictions
    player_df.loc['next_game', :] = None

    # Shift opposing defense and league average down a game
    player_df.loc[:, 'trb_mean'] = player_df.loc[:, 'trb_mean'].shift(1)
    player_df.loc[:, 'trb_lg_avg'] = player_df.loc[:, 'trb_lg_avg'].shift(1)

    # Cumulative moving average for rebounds
    player_df['rb'] = player_df['trb'].shift(1) 
    player_df['rb_sum'] = player_df['rb'].expanding(min_periods=5).sum().values
    player_df['rb_count'] = player_df['rb'].expanding(min_periods=5).count().values
    player_df['rb_mean'] = player_df['rb'].expanding(min_periods=5).mean().values

    return player_df


Normalize for opposing defense

In [23]:
def normalize(player_df):
    # Select next game
    mu_df = player_df.iloc[-1, :][['rb_mean', 'trb_mean', 'trb_lg_avg']]

    # # Normalize data
    normalize = 1 + (mu_df['trb_mean'] - mu_df['trb_lg_avg']) / mu_df['trb_lg_avg']
    mu = mu_df['rb_mean'] * normalize

    return mu

Function to calculate implied odds

In [24]:
def convert_odds_to_perc(price):
    if price < 0:
        return abs(price) / (100 + abs(price))
    else:
        return 100 / (100 + price)

Function to calculate EV


In [25]:
def expected_value(prob, odds):
    return prob * odds - (1 - prob)

Calculate EV

In [26]:
def calculate_expected_value(projection, type, line, price):
    # Poisson distribution probability
    under = stats.poisson.cdf(k = line, mu = projection)
    over = 1 - under

    # Expected value
    if type == 'Over':
        ev = expected_value(over, convert_odds_to_perc(price))
    else:
        ev = expected_value(under, convert_odds_to_perc(price))
    
    return round(ev * 100, 1)



Load in player prop lines for date

In [27]:
next_game_date = date(2023, 3, 11)

In [28]:
home_teams = schedules.loc[schedules['date'] == str(next_game_date), 'home'].to_list()
away_teams = schedules.loc[schedules['date'] == str(next_game_date), 'visitor'].to_list()

In [29]:
player_props = pd.read_csv("backend/data/odds/player_props/rebounds.csv")
player_props['last_updated'] = pd.to_datetime(player_props.loc[:, 'last_updated'])
player_props = player_props.loc[
    (player_props['last_updated'].dt.date == next_game_date) & 
    (player_props['home'].isin(home_teams)) & 
    (player_props['away'].isin(away_teams))
]

Calculate EV and projection for each player and book

In [30]:
player_props['projection'] = player_props.apply(
    lambda row: normalize(player_analysis(row.player, str(next_game_date))),
    axis=1
)
player_props['EV'] = player_props.apply(
    lambda row: calculate_expected_value(row.projection, row.type, row.line, row.price),
    axis=1
)

immanuel quickley
immanuel quickley
ivica zubac
ivica zubac
josh hart
josh hart
julius randle
julius randle
kawhi leonard
kawhi leonard
marcus morris
marcus morris
mason plumlee
mason plumlee
mitchell robinson
mitchell robinson
paul george
paul george
quentin grimes
quentin grimes
rj barrett
rj barrett
russell westbrook
russell westbrook
terance mann
terance mann
kawhi leonard
kawhi leonard
paul george
paul george
russell westbrook
russell westbrook
ivica zubac
ivica zubac
julius randle
julius randle
mitchell robinson
mitchell robinson
quentin grimes
quentin grimes
rj barrett
rj barrett
immanuel quickley
immanuel quickley
marcus morris
marcus morris
eric gordon
eric gordon
josh hart
josh hart
terance mann
terance mann
mason plumlee
mason plumlee
immanuel quickley
immanuel quickley
terance mann
terance mann
ivica zubac
ivica zubac
josh hart
josh hart
marcus morris
marcus morris
russell westbrook
russell westbrook
julius randle
julius randle
kawhi leonard
kawhi leonard
quentin grimes
que

Players with +EV

In [31]:
pd.set_option('display.max_rows', None)
player_props.loc[player_props['EV'] >= 10, :].sort_values(by=['date', 'home', 'player', 'book']).loc[:, ['home', 'away', 'player', 'type', 'line', 'price', 'projection', 'EV']]

Unnamed: 0,home,away,player,type,line,price,projection,EV
9890,Los Angeles Clippers,New York Knicks,Ivica Zubac,Over,7.5,-120,9.708282,16.3
10015,Los Angeles Clippers,New York Knicks,Ivica Zubac,Over,7.5,-130,9.708282,17.8
9964,Los Angeles Clippers,New York Knicks,Ivica Zubac,Over,7.5,-130,9.708282,17.8
9860,Los Angeles Clippers,New York Knicks,Ivica Zubac,Over,7.5,-125,9.708282,17.0
9916,Los Angeles Clippers,New York Knicks,Ivica Zubac,Over,7.5,-122,9.708282,16.6
9988,Los Angeles Clippers,New York Knicks,Ivica Zubac,Over,7.5,-125,9.708282,17.0
9938,Los Angeles Clippers,New York Knicks,Ivica Zubac,Over,7.5,-117,9.708282,15.8
10042,Los Angeles Clippers,New York Knicks,Ivica Zubac,Over,7.5,-125,9.708282,17.0
9920,Los Angeles Clippers,New York Knicks,Marcus Morris,Over,2.5,-102,3.90404,12.5
9996,Los Angeles Clippers,New York Knicks,Marcus Morris,Over,2.5,100,3.90404,12.1
