# Rebounds

In [409]:
import os
import pandas as pd
import numpy as np
import scipy.stats as stats
from unidecode import unidecode
from datetime import date

In [410]:
os.chdir('/home/tylerengland/NBA')
os.getcwd()

'/home/tylerengland/NBA'

### Load Data

In [411]:
df = pd.read_csv('backend/data/details/game_details.csv')

In [412]:
df.head()

Unnamed: 0,date,visitor,home,team,starter,player,mp,fg,fga,fg_perc,...,drb,trb,ast,stl,blk,tov,pf,pts,plus_minus,season
0,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Kirk Hinrich,34:38,10.0,18.0,0.556,...,4.0,4.0,3.0,2.0,0.0,0.0,2.0,26.0,23.0,
1,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Ben Wallace,28:29,2.0,5.0,0.4,...,5.0,11.0,1.0,0.0,1.0,0.0,2.0,5.0,13.0,
2,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Luol Deng,24:07,4.0,9.0,0.444,...,1.0,2.0,1.0,1.0,0.0,3.0,2.0,12.0,8.0,
3,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Ben Gordon,22:56,1.0,9.0,0.111,...,1.0,1.0,2.0,1.0,0.0,2.0,3.0,6.0,7.0,
4,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,P.J. Brown,18:23,1.0,2.0,0.5,...,3.0,3.0,1.0,1.0,1.0,2.0,3.0,4.0,15.0,


In [413]:
df.columns

Index(['date', 'visitor', 'home', 'team', 'starter', 'player', 'mp', 'fg',
       'fga', 'fg_perc', '3p', '3pa', '3p_perc', 'ft', 'fta', 'ft_perc', 'orb',
       'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'plus_minus',
       'season'],
      dtype='object')

Drop duplicates and NA's

In [414]:
df.drop_duplicates(subset=['date', 'visitor', 'home', 'team', 'player'], inplace=True)

In [415]:
df.dropna(subset = ['player'], inplace = True)

In [416]:
df.isna().sum()

date               0
visitor            0
home               0
team               0
starter            0
player             0
mp                 0
fg                 0
fga                0
fg_perc        22692
3p                 0
3pa                0
3p_perc       156463
ft                 0
fta                0
ft_perc       188212
orb                0
drb                0
trb                0
ast                0
stl                0
blk                0
tov                0
pf                 0
pts                0
plus_minus        80
season        535562
dtype: int64

Clean data

In [417]:
df['fg_perc'] = df['fg'] / df['fga']
df['3p_perc'] = df['3p'] / df['3pa']
df['ft_perc'] = df['ft'] / df['fta']
df['min'] = df['mp'].apply(lambda x: int(x.split(':')[0]))
df['sec'] = df['mp'].apply(lambda x: int(x.split(':')[-1]))
df['pt'] = df['min'] + df['sec'] / 60
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['player'] = df['player'].apply(lambda x: unidecode(x.strip().lower()))

In [418]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 535640 entries, 0 to 536857
Data columns (total 32 columns):
 #   Column      Non-Null Count   Dtype         
---  ------      --------------   -----         
 0   date        535640 non-null  datetime64[ns]
 1   visitor     535640 non-null  object        
 2   home        535640 non-null  object        
 3   team        535640 non-null  int64         
 4   starter     535640 non-null  float64       
 5   player      535640 non-null  object        
 6   mp          535640 non-null  object        
 7   fg          535640 non-null  float64       
 8   fga         535640 non-null  float64       
 9   fg_perc     422832 non-null  float64       
 10  3p          535640 non-null  float64       
 11  3pa         535640 non-null  float64       
 12  3p_perc     289061 non-null  float64       
 13  ft          535640 non-null  float64       
 14  fta         535640 non-null  float64       
 15  ft_perc     257312 non-null  float64       
 16  or

Feature Engineer

In [419]:
x = df.groupby(['date', 'visitor', 'home', 'team'])[['fga', 'orb', 'tov', 'fta']].transform('sum')
df['possessions'] = x.fga - x.orb + x.tov + 0.4 * x.fta

In [420]:
def get_season(month, year):
    if 10 <= month <= 12:
        return year
    else:
        return year - 1

In [421]:
df['season'] = df.apply(lambda x: get_season(x.month, x.year), 1)
df['opponent'] = np.where(df['team'], df['visitor'], df['home'])

Merge Schedule

In [422]:
schedules = pd.read_csv("backend/data/schedules/2022.csv", index_col = 0)
schedules['date'] = pd.to_datetime(schedules['date'])

home_schedule = schedules.copy()
home_schedule['team'] = 1
home_schedule['opponent'] = home_schedule['visitor']

visitor_schedule = schedules.copy()
visitor_schedule['team'] = 0
visitor_schedule['opponent'] = visitor_schedule['home']

schedules = pd.concat([home_schedule, visitor_schedule])

df = pd.merge(schedules, df, left_on = ['date', 'visitor', 'home', 'team', 'opponent'], right_on = ['date', 'visitor', 'home', 'team', 'opponent'], how = 'left')

Teams Defense

In [423]:
defense_df = pd.read_csv('backend/data/totals/game_totals.csv', index_col = 0)
defense_df['date'] = pd.to_datetime(defense_df['date'])

defense_df = pd.merge(schedules, defense_df, left_on = ['date', 'visitor', 'home', 'team'], right_on = ['date', 'visitor', 'home', 'team'], how = 'left')
defense_df.drop_duplicates(subset = ['date', 'visitor', 'home', 'team'], inplace = True)

Feature engineer for defense

In [424]:
defense_df['team'] = np.where(defense_df['team'], defense_df['visitor'], defense_df['home'])
defense_df['date'] = pd.to_datetime(defense_df['date'])
defense_df['year'] = defense_df['date'].dt.year
defense_df['month'] = defense_df['date'].dt.month
defense_df['season'] = defense_df.apply(lambda x: get_season(x.month, x.year), 1)

Cumulative moving average for teams 

In [425]:
defense_df['rb'] = defense_df.groupby(['season', 'team'])['trb'].shift(1)
defense_df['trb_sum'] = defense_df.groupby(['season', 'team'])['rb'].expanding(5).sum().sort_index(axis = 0, level = 2).values
defense_df['trb_count'] = defense_df.groupby(['season', 'team'])['rb'].expanding(5).count().sort_index(axis = 0, level = 2).values
defense_df['trb_mean'] = defense_df.groupby(['season', 'team'])['rb'].expanding(5).mean().sort_index(axis = 0, level = 2).values

League average

In [426]:
rb_lg_sum = defense_df.groupby(['season', 'date'])['trb'].sum().groupby(['season']).shift(1)
rb_lg_sum = rb_lg_sum.groupby(['season']).expanding(1).sum()
rb_lg_count = defense_df.groupby(['season', 'date'])['trb'].count().groupby(['season']).shift(1)
rb_lg_count = rb_lg_count.groupby(['season']).expanding(1).sum()
rb_lg_avg = rb_lg_sum / rb_lg_count
rb_lg_avg.index = defense_df.groupby(['season', 'date'])['trb'].sum().index
rb_lg_avg = rb_lg_avg.reset_index()
defense_df = pd.merge(defense_df, rb_lg_avg, on = ['season', 'date'], how = 'left', suffixes = ('', '_lg_avg'))


Merge in opposing defense

In [427]:
df = pd.merge(
    df, 
    defense_df[['team', 'date', 'trb_mean', 'trb_lg_avg']], 
    left_on = ['opponent', 'date'], 
    right_on = ['team', 'date'], 
    how = 'left', 
    suffixes = ('', '_opp')
)

Functions to convert odds

In [428]:
def convert_perc_to_odds(perc):
    if perc > 0.5:
        return round((100 * perc) / (1 - perc)) * -1
    else:
        return round((1 - perc) * 100 / perc)


In [429]:
def convert_odds_to_perc(odds):
    if odds < 0:
        return round(abs(odds) / (abs(odds) + 100), 3)
    else:
        return round(100 / (abs(odds) + 100), 3)

Function to calculate EV

In [430]:
def expected_value(prob, odds):
    return prob * odds - (1 - prob)

Player Analysis

In [431]:
def player_analysis(player, date, df=df):
    player_df = df.loc[(df['player'] == player) & (df['date'] <= date)].sort_values(by=['date'], ascending=True).copy()

    # Keep relevant team data (addresses trades)
    player_df.loc[:, 'team'] = np.where(player_df['team'] == 1, player_df['home'], player_df['visitor'])
    most_recent_team = player_df.loc[player_df['date'] == player_df['date'].max(), 'team'].values[0]
    player_df = player_df.loc[player_df['team'] == most_recent_team, :]

    # Keep games where player played
    player_df = player_df[player_df['pt'] != '0']

    # Add row to hole next game predictions
    player_df.loc['next_game', :] = None

    # Shift opposing defense and league average down a game
    player_df.loc[:, 'trb_mean'] = player_df.loc[:, 'trb_mean'].shift(1)
    player_df.loc[:, 'trb_lg_avg'] = player_df.loc[:, 'trb_lg_avg'].shift(1)

    # Cumulative moving average for rebounds per min
    player_df['rb'] = player_df['trb'].shift(1) / player_df['pt'].shift(1)
    player_df['rb_sum'] = player_df['rb'].expanding(1).sum().values
    player_df['rb_count'] = player_df['rb'].expanding(1).count().values
    player_df['rb_mean'] = player_df['rb'].expanding(1).mean().values

    return player_df


Load minute projections

In [432]:
def load_minute_projections():
    minute_projections = pd.read_csv(
        'backend/data/rotowire-nba-projections.csv', 
        header=1, 
        usecols=[0, 4], 
        names=['player', 'min_proj']
    )
    minute_projections = dict(zip(minute_projections.loc[:, 'player'].str.lower(), minute_projections.loc[:, 'min_proj']))

    return minute_projections

Apply minute projections and normalize projected rebounds

In [433]:
def project_and_normalize(player, player_df):
    # Select next game
    mu_df = player_df.iloc[-1, :][['rb_mean', 'trb_mean', 'trb_lg_avg']]

    # Load minute projections
    minute_projections = load_minute_projections()

    # Edge cases where there a difference in player names
    minutes_players = {
        'wendell carter jr.': 'wendell carter', 'jaren jackson jr.': 'jaren jackson', 
        'michael porter jr.': 'michael porter', 'trey murphy iii': 'trey murphy',
    }

    # Apply minute projections to season average per min
    mu_df['rb_mean'] = mu_df['rb_mean'] * minute_projections[unidecode(minutes_players.get(player, player))]

    # Normalize data
    normalize = 1 + (mu_df['trb_mean'] - mu_df['trb_lg_avg']) / mu_df['trb_lg_avg']
    mu = mu_df['rb_mean'] * normalize

    return mu

Calculate odds

In [434]:
def calculate_expected_value(player, today, type, line, price):

    player = unidecode(player.strip().lower())

    # Edge cases where there a difference in player names
    edge_cases = {
        'bojan bogdanovich': 'bojan bogdanovic', 'caldwell pope kentavious': 'kentavious caldwell-pope',
        'pj tucker': 'p.j. tucker', 'robert williams iii': 'robert williams',
        'wendell carter': 'wendell carter jr.', 'christian james mccollum': 'cj mccollum',
        'grant jerami': 'jerami grant', 'huerter kevin': 'kevin huerter', 'p. j. tucker': 'p.j. tucker',
        'p.j tucker': 'p.j. tucker'
    }
    player = edge_cases.get(player, player)

    player_df = player_analysis(player, today)
    mu = project_and_normalize(player, player_df)

    # Poisson distribution probability
    under = stats.poisson.cdf(k = line, mu = mu)
    over = 1 - under

    # Expected value
    if type == 'Over':
        ev = expected_value(over, convert_odds_to_perc(price))
    else:
        ev = expected_value(under, convert_odds_to_perc(price))
    
    return round(ev * 100, 1)



Load in player prop lines for date

In [435]:
next_game_date = date(2023, 2, 24)
next_game_str = "2023-02-23"

In [436]:
player_props = pd.read_csv("backend/data/odds/player_props/rebounds.csv")
player_props['date'] = pd.to_datetime(player_props.loc[:, 'date'])
player_props = player_props.loc[player_props['date'].dt.date == next_game_date]

Calculate EV for each player and book

In [438]:
input_columns = ['player', 'type', 'line', 'price']
player_props['EV'] = player_props.loc[:, input_columns].apply(
    lambda row: calculate_expected_value(row.player, next_game_str, row.type, row.line, row.price),
    axis=1
)

Players with +EV

In [440]:
player_props.loc[player_props['EV'] > 0, 'player'].unique()

array(['Aaron Nesmith', 'CJ McCollum', 'Drew Eubanks', 'Fred VanVleet',
       'Jerami Grant', 'Joel Embiid', 'Josh Green', 'Kevon Looney',
       'Malaki Branham', 'Nikola Jokic', 'Paolo Banchero',
       'Pascal Siakam', 'Zach Collins', 'Jakob Poeltl', 'Grant Jerami',
       'James Harden', 'Luguentz Dort', 'Shai Gilgeous-Alexander',
       'Jalen Duren', 'Jeremy Sochan', 'Brandon Ingram'], dtype=object)

Search by player

In [460]:
player = "Brandon Ingram"
player_props.loc[player_props['player'] == player]

Unnamed: 0,date,home,away,player,type,line,price,book,last_updated,EV
8,2023-02-24 00:40:00+00:00,Toronto Raptors,New Orleans Pelicans,Brandon Ingram,Over,4.5,-160,BetMGM,2023-02-23T00:41:10Z,-0.1
9,2023-02-24 00:40:00+00:00,Toronto Raptors,New Orleans Pelicans,Brandon Ingram,Under,4.5,120,BetMGM,2023-02-23T00:41:10Z,-44.5
166,2023-02-24 00:40:00+00:00,Toronto Raptors,New Orleans Pelicans,Brandon Ingram,Over,4.5,-156,BetOnline.ag,2023-02-23T00:40:06Z,-0.4
167,2023-02-24 00:40:00+00:00,Toronto Raptors,New Orleans Pelicans,Brandon Ingram,Under,4.5,121,BetOnline.ag,2023-02-23T00:40:06Z,-44.6
302,2023-02-24 00:40:00+00:00,Toronto Raptors,New Orleans Pelicans,Brandon Ingram,Over,5.5,120,Bovada,2023-02-23T00:39:23Z,-35.2
303,2023-02-24 00:40:00+00:00,Toronto Raptors,New Orleans Pelicans,Brandon Ingram,Under,5.5,-160,Bovada,2023-02-23T00:39:23Z,-10.4
422,2023-02-24 00:40:00+00:00,Toronto Raptors,New Orleans Pelicans,Brandon Ingram,Over,4.5,-155,DraftKings,2023-02-23T00:41:27Z,-0.5
423,2023-02-24 00:40:00+00:00,Toronto Raptors,New Orleans Pelicans,Brandon Ingram,Under,4.5,125,DraftKings,2023-02-23T00:41:27Z,-44.9
604,2023-02-24 00:40:00+00:00,Toronto Raptors,New Orleans Pelicans,Brandon Ingram,Over,5.5,122,FanDuel,2023-02-23T00:38:31Z,-35.4
605,2023-02-24 00:40:00+00:00,Toronto Raptors,New Orleans Pelicans,Brandon Ingram,Under,5.5,-154,FanDuel,2023-02-23T00:38:31Z,-10.9
