# Rebounds

In [1]:
import os
import pandas as pd
import numpy as np
import scipy.stats as stats
from unidecode import unidecode
from datetime import date

In [2]:
os.chdir('/home/tylerengland/NBA')
os.getcwd()

'/home/tylerengland/NBA'

### Load Data

In [3]:
df = pd.read_csv('backend/data/details/game_details.csv')

In [4]:
df.head()

Unnamed: 0,date,visitor,home,team,starter,player,mp,fg,fga,fg_perc,...,drb,trb,ast,stl,blk,tov,pf,pts,plus_minus,season
0,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Kirk Hinrich,34:38:00,10.0,18.0,0.556,...,4.0,4.0,3.0,2.0,0.0,0.0,2.0,26.0,23.0,
1,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Ben Wallace,28:29:00,2.0,5.0,0.4,...,5.0,11.0,1.0,0.0,1.0,0.0,2.0,5.0,13.0,
2,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Luol Deng,24:07:00,4.0,9.0,0.444,...,1.0,2.0,1.0,1.0,0.0,3.0,2.0,12.0,8.0,
3,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,Ben Gordon,22:56,1.0,9.0,0.111,...,1.0,1.0,2.0,1.0,0.0,2.0,3.0,6.0,7.0,
4,"Tue, Oct 31, 2006",Chicago Bulls,Miami Heat,0,1.0,P.J. Brown,18:23,1.0,2.0,0.5,...,3.0,3.0,1.0,1.0,1.0,2.0,3.0,4.0,15.0,


In [5]:
df.columns

Index(['date', 'visitor', 'home', 'team', 'starter', 'player', 'mp', 'fg',
       'fga', 'fg_perc', '3p', '3pa', '3p_perc', 'ft', 'fta', 'ft_perc', 'orb',
       'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'plus_minus',
       'season'],
      dtype='object')

Drop duplicates and NA's

In [6]:
df.drop_duplicates(subset=['date', 'visitor', 'home', 'team', 'player'], inplace=True)

In [7]:
df.dropna(subset = ['player'], inplace = True)

In [8]:
df.isna().sum()

date               0
visitor            0
home               0
team               0
starter            0
player             0
mp                 0
fg                 0
fga                0
fg_perc        22819
3p                 0
3pa                0
3p_perc       157057
ft                 0
fta                0
ft_perc       189479
orb                0
drb                0
trb                0
ast                0
stl                0
blk                0
tov                0
pf                 0
pts                0
plus_minus        80
season        532543
dtype: int64

Clean data

In [9]:
df['fg_perc'] = df['fg'] / df['fga']
df['3p_perc'] = df['3p'] / df['3pa']
df['ft_perc'] = df['ft'] / df['fta']
df['min'] = df['mp'].apply(lambda x: int(x.split(':')[0]))
df['sec'] = df['mp'].apply(lambda x: int(x.split(':')[-1]))
df['pt'] = df['min'] + df['sec'] / 60
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['player'] = df['player'].apply(lambda x: unidecode(x.strip().lower()))

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 539268 entries, 0 to 539883
Data columns (total 32 columns):
 #   Column      Non-Null Count   Dtype         
---  ------      --------------   -----         
 0   date        539268 non-null  datetime64[ns]
 1   visitor     539268 non-null  object        
 2   home        539268 non-null  object        
 3   team        539268 non-null  int64         
 4   starter     539268 non-null  float64       
 5   player      539268 non-null  object        
 6   mp          539268 non-null  object        
 7   fg          539268 non-null  float64       
 8   fga         539268 non-null  float64       
 9   fg_perc     425652 non-null  float64       
 10  3p          539268 non-null  float64       
 11  3pa         539268 non-null  float64       
 12  3p_perc     291414 non-null  float64       
 13  ft          539268 non-null  float64       
 14  fta         539268 non-null  float64       
 15  ft_perc     258992 non-null  float64       
 16  or

Feature Engineer

In [11]:
x = df.groupby(['date', 'visitor', 'home', 'team'])[['fga', 'orb', 'tov', 'fta']].transform('sum')
df['possessions'] = x.fga - x.orb + x.tov + 0.4 * x.fta

In [12]:
def get_season(month, year):
    if 10 <= month <= 12:
        return year
    else:
        return year - 1

In [13]:
df['season'] = df.apply(lambda x: get_season(x.month, x.year), 1)
df['opponent'] = np.where(df['team'], df['visitor'], df['home'])

Merge Schedule

In [14]:
schedules = pd.read_csv("backend/data/schedules/2022.csv")
schedules['date'] = pd.to_datetime(schedules['date'])

home_schedule = schedules.copy()
home_schedule['team'] = 1
home_schedule['opponent'] = home_schedule['visitor']

visitor_schedule = schedules.copy()
visitor_schedule['team'] = 0
visitor_schedule['opponent'] = visitor_schedule['home']

schedules = pd.concat([home_schedule, visitor_schedule])

df = pd.merge(schedules, df, left_on = ['date', 'visitor', 'home', 'team', 'opponent'], right_on = ['date', 'visitor', 'home', 'team', 'opponent'], how = 'left')

Teams Defense

In [15]:
df.columns

Index(['date', 'visitor', 'home', 'season_x', 'team', 'opponent', 'starter',
       'player', 'mp', 'fg', 'fga', 'fg_perc', '3p', '3pa', '3p_perc', 'ft',
       'fta', 'ft_perc', 'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf',
       'pts', 'plus_minus', 'season_y', 'min', 'sec', 'pt', 'year', 'month',
       'possessions'],
      dtype='object')

In [16]:
df['defense'] = np.where(df['team'], df['visitor'], df['home'])
defense_df = df.groupby(['date', 'defense'])[['3p']].sum().reset_index()

Feature engineer for defense

In [17]:
defense_df['date'] = pd.to_datetime(defense_df['date'])
defense_df['year'] = defense_df['date'].dt.year
defense_df['month'] = defense_df['date'].dt.month
defense_df['season'] = defense_df.apply(lambda x: get_season(x.month, x.year), 1)

Cumulative moving average for teams 

In [18]:
defense_df['3pt'] = defense_df.groupby(['season', 'defense'])['3p'].shift(1)
defense_df['3pt_sum'] = defense_df.groupby(['season', 'defense'])['3pt'].expanding(min_periods=5).sum().sort_index(axis = 0, level = 2).values
defense_df['3pt_count'] = defense_df.groupby(['season', 'defense'])['3pt'].expanding(min_periods=5).count().sort_index(axis = 0, level = 2).values
defense_df['3pt_mean'] = defense_df.groupby(['season', 'defense'])['3pt'].expanding(min_periods=5).mean().sort_index(axis = 0, level = 2).values

League average

In [19]:
lg_sum = defense_df.groupby(['season', 'date'])['3pt'].sum().groupby(['season']).shift(1)
lg_sum = lg_sum.groupby(['season']).expanding(min_periods=5).sum()
lg_count = defense_df.groupby(['season', 'date'])['3pt'].count().groupby(['season']).shift(1)
lg_count = lg_count.groupby(['season']).expanding(min_periods=5).sum()
lg_avg = lg_sum / lg_count
lg_avg.index = defense_df.groupby(['season', 'date'])['3pt'].sum().index
lg_avg = lg_avg.reset_index()
defense_df = pd.merge(defense_df, lg_avg, on = ['season', 'date'], how = 'left', suffixes = ('', '_lg_avg'))


Merge in opposing defense

In [20]:
df = pd.merge(
    df, 
    defense_df[['defense', 'date', '3pt_mean', '3pt_lg_avg']], 
    left_on = ['opponent', 'date'], 
    right_on = ['defense', 'date'], 
    how = 'left', 
    suffixes = ('', '_opp')
)

Player Analysis

In [21]:
def player_analysis(player, date, df=df):
    # Normalize player name
    player = unidecode(player.strip().lower())

    # Edge cases where there a difference in player names
    edge_cases = {
        'bojan bogdanovich': 'bojan bogdanovic', 'caldwell pope kentavious': 'kentavious caldwell-pope',
        'pj tucker': 'p.j. tucker', 'robert williams iii': 'robert williams',
        'wendell carter': 'wendell carter jr.', 'christian james mccollum': 'cj mccollum',
        'grant jerami': 'jerami grant', 'huerter kevin': 'kevin huerter', 'p. j. tucker': 'p.j. tucker',
        'p.j tucker': 'p.j. tucker', 'nicolas claxton': 'nic claxton', 'cam johnson': 'cameron johnson',
        'rj barrett jr.': 'rj barrett', 'jimmy butler iii': 'jimmy butler', 'dorian finney smith': 'dorian finney-smith',
        'mike conley jr.': 'mike conley', 'anderson kyle': 'kyle anderson', 'jabari smith': 'jabari smith jr.', 
        'kj martin': 'kenyon martin jr.', 'jabari smith jr': 'jabari smith jr.', 'martin jr. k.': 'kenyon martin jr.',
        'alfred joel horford reynoso': 'al horford', 'j. tate': "jae'sean tate", 'anthony davis jr.': 'anthony davis',
        'murray dejounte': 'dejounte murray', 'brook robert lopez': 'brook lopez', 'og anunoby jr.': 'og anunoby',
        'marcus morris sr.': 'marcus morris', 'morris marcus': 'marcus morris', 'kelly oubre jr': 'kelly oubre jr.',
        'kelly oubre': 'kelly oubre jr.', 'dennis smith jr': 'dennis smith jr.', 'trendon watford (por)': 'trendon watford',
        'cameron thomas': 'cam thomas', 'durant kevin': 'kevin durant', 'xavier tillman': 'xavier tillman sr.',
        'k. caldwell-pope': 'kentavious caldwell-pope', 'michael porter': 'michael porter jr.', 'trey murphy': 'trey murphy iii',
        'troy brown jr': 'troy brown jr.', 'troy brown': 'troy brown jr.', 'marvin bagley': 'marvin bagley iii',
        'livers isaiah': 'isaiah livers', 'k. middleton': 'khris middleton', 'portis, bobby': 'bobby portis', 'bobby portis jr.': 'bobby portis',
        'pj washington': 'p.j. washington', 'p.j washington': 'p.j. washington', 'jabari smith ii': 'jabari smith jr.', 
        'kevin porter jr': 'kevin porter jr.', 'kevin porter': 'kevin porter jr.', 's. gilgeous-alexander': 'shai gilgeous-alexander', 
        'v. oladipo': 'victor oladipo', 'tim hardaway': 'tim hardaway jr.', 'westbrook russell': 'russell westbrook', 
        'p.j. washington jr.': 'p.j. washington', 'g. antetokounmpo': 'giannis antetokounmpo', 'lyles trey': 'trey lyles',
        'o.g. anunoby': 'og anunoby', 'r. mcgruder': 'rodney mcgruder', 'gary trent': 'gary trent jr.'
    }
    player = edge_cases.get(player, player)
    print(player)
    
    player_df = df.loc[(df['player'] == player) & (df['date'] <= date)].sort_values(by=['date'], ascending=True).copy()

    # Keep relevant team data (addresses trades)
    player_df.loc[:, 'team'] = np.where(player_df['team'] == 1, player_df['home'], player_df['visitor'])
    most_recent_team = player_df.loc[player_df['date'] == player_df['date'].max(), 'team'].values[0]
    player_df = player_df.loc[player_df['team'] == most_recent_team, :]

    # Keep games where player played
    player_df = player_df[player_df['pt'] != '0']

    # Add row to hole next game predictions
    player_df.loc['next_game', :] = None

    # Shift opposing defense and league average down a game
    player_df.loc[:, '3pt_mean'] = player_df.loc[:, '3pt_mean'].shift(1)
    player_df.loc[:, '3pt_lg_avg'] = player_df.loc[:, '3pt_lg_avg'].shift(1)

    # Cumulative moving average for threes
    player_df['3p'] = player_df['3p'].shift(1) 
    player_df['3p_sum'] = player_df['3p'].expanding(min_periods=5).sum().values
    player_df['3p_count'] = player_df['3p'].expanding(min_periods=5).count().values
    player_df['3p_mean'] = player_df['3p'].expanding(min_periods=5).mean().values

    return player_df


Normalize for opposing defense

In [22]:
def normalize(player_df):
    # Select next game
    mu_df = player_df.iloc[-1, :][['3p_mean', '3pt_mean', '3pt_lg_avg']]

    # # Normalize data
    normalize = 1 + (mu_df['3pt_mean'] - mu_df['3pt_lg_avg']) / mu_df['3pt_lg_avg']
    mu = mu_df['3p_mean'] * normalize

    return mu

Function to calculate implied odds

In [23]:
def convert_odds_to_perc(price):
    if price < 0:
        return abs(price) / (100 + abs(price))
    else:
        return 100 / (100 + price)

Function to calculate EV


In [24]:
def expected_value(prob, odds):
    return prob * odds - (1 - prob)

Calculate EV

In [25]:
def calculate_expected_value(projection, type, line, price):
    # Poisson distribution probability
    under = stats.poisson.cdf(k = line, mu = projection)
    over = 1 - under

    # Expected value
    if type == 'Over':
        ev = expected_value(over, convert_odds_to_perc(price))
    else:
        ev = expected_value(under, convert_odds_to_perc(price))
    
    return round(ev * 100, 1)



Load in player prop lines for date

In [26]:
next_game_date = date.today()

In [27]:
home_teams = schedules.loc[schedules['date'] == str(next_game_date), 'home'].to_list()
away_teams = schedules.loc[schedules['date'] == str(next_game_date), 'visitor'].to_list()

In [28]:
player_props = pd.read_csv("backend/data/odds/player_props/3pt.csv")
player_props['last_updated'] = pd.to_datetime(player_props.loc[:, 'last_updated'])
player_props = player_props.loc[
    (player_props['last_updated'].dt.date == next_game_date) & 
    (player_props['home'].isin(home_teams)) & 
    (player_props['away'].isin(away_teams))
]

Calculate EV and projection for each player and book

In [29]:
player_props['projection'] = player_props.apply(
    lambda row: normalize(player_analysis(row.player, str(next_game_date))),
    axis=1
)
player_props['EV'] = player_props.apply(
    lambda row: calculate_expected_value(row.projection, row.type, row.line, row.price),
    axis=1
)

caris levert
caris levert
darius garland
darius garland
dennis smith jr.
dennis smith jr.
evan mobley
evan mobley
gordon hayward
gordon hayward
isaac okoro
isaac okoro
kelly oubre jr.
kelly oubre jr.
lamar stevens
lamar stevens
p.j. washington
p.j. washington
ricky rubio
ricky rubio
terry rozier
terry rozier
darius garland
darius garland
kelly oubre jr.
kelly oubre jr.
isaac okoro
isaac okoro
gordon hayward
gordon hayward
terry rozier
terry rozier
p.j. washington
p.j. washington
caris levert
caris levert
evan mobley
evan mobley
kelly oubre jr.
kelly oubre jr.
dennis smith jr.
dennis smith jr.
caris levert
caris levert
lamar stevens
lamar stevens
isaac okoro
isaac okoro
ricky rubio
ricky rubio
lamar stevens
lamar stevens
isaac okoro
isaac okoro
dennis smith jr.
dennis smith jr.
darius garland
darius garland
ricky rubio
ricky rubio
p.j. washington
p.j. washington
gordon hayward
gordon hayward
evan mobley
evan mobley
kelly oubre jr.
kelly oubre jr.
caris levert
caris levert
terry rozier
t

Players with +EV

In [31]:
pd.set_option('display.max_rows', None)
player_props.loc[player_props['EV'] >= 10, :].sort_values(by=['date', 'home', 'player', 'book']).loc[:, ['home', 'away', 'player', 'type', 'line', 'price', 'projection', 'EV']]

29


Unnamed: 0,home,away,player,type,line,price,projection,EV
41,Charlotte Hornets,Cleveland Cavaliers,Dennis Smith Jr,Under,0.5,-160,0.38247,10.2
55,Charlotte Hornets,Cleveland Cavaliers,Dennis Smith Jr.,Under,0.5,-160,0.38247,10.2
37,Charlotte Hornets,Cleveland Cavaliers,Evan Mobley,Under,0.5,-225,0.293802,26.1
74,Charlotte Hornets,Cleveland Cavaliers,Evan Mobley,Under,0.5,-233,0.293802,26.7
7,Charlotte Hornets,Cleveland Cavaliers,Evan Mobley,Under,0.5,-220,0.293802,25.8
65,Charlotte Hornets,Cleveland Cavaliers,Evan Mobley,Under,0.5,-234,0.293802,26.8
129,Charlotte Hornets,Cleveland Cavaliers,Gordon Hayward,Under,1.5,-205,0.992186,23.5
76,Charlotte Hornets,Cleveland Cavaliers,Gordon Hayward,Under,1.5,-222,0.992186,24.8
109,Charlotte Hornets,Cleveland Cavaliers,Gordon Hayward,Under,1.5,-215,0.992186,24.3
9,Charlotte Hornets,Cleveland Cavaliers,Gordon Hayward,Under,1.5,-205,0.992186,23.5
