### Import packages

In [13]:
import os
import pandas as pd
import numpy as np

pd.set_option("display.max_columns", None)

### Set working directory

In [14]:
# Print working directory
cwd = os.getcwd()
print(f'Directory: {cwd}')

# Change working directory
os.chdir('/Users/tyler/OneDrive/Documents/Python/NBA')

# Print working directory
cwd = os.getcwd()
print(f'Directory: {cwd}')

Directory: C:\Users\tyler\OneDrive\Documents\Python\NBA
Directory: C:\Users\tyler\OneDrive\Documents\Python\NBA


## Exploratory Data Analysis

### Import data

In [15]:
totals_df = pd.read_csv('backend/data/totals/game_totals.csv').drop(['Unnamed: 0'], axis=1)[['date', 'visitor', 'home', '3p']]
predict_df = pd.read_csv('backend/predictions/3p_predictions.csv').drop(['Unnamed: 0'], axis=1)

# Convert date to datetime
totals_df['date'] = pd.to_datetime(totals_df['date'])
predict_df['date'] = pd.to_datetime(predict_df['date'])

# Total 3pt by both teams
totals_df = totals_df.groupby(['date', 'visitor', 'home']).sum()

# Merge datafrtames
df = pd.merge(predict_df, totals_df, left_on=['date', 'visitor', 'home'], right_on=['date', 'visitor', 'home'], how='left')
df = df.drop_duplicates(['date', 'visitor', 'home']).dropna(axis=0)

# Seperate current games from past games
today_df = df[df['3p'] == 0]
df = df[df['3p'] > 0]

df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 137 entries, 0 to 143
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   date              137 non-null    datetime64[ns]
 1   visitor           137 non-null    object        
 2   home              137 non-null    object        
 3   linear            137 non-null    int64         
 4   decision_tree     137 non-null    int64         
 5   gradient_boosted  137 non-null    int64         
 6   neural_network    137 non-null    int64         
 7   random_forest     137 non-null    int64         
 8   line              137 non-null    float64       
 9   avg               137 non-null    float64       
 10  over              137 non-null    float64       
 11  under             137 non-null    float64       
 12  3p                137 non-null    float64       
dtypes: datetime64[ns](1), float64(5), int64(5), object(2)
memory usage: 15.0+ KB


In [16]:
def myround(x, base=.5):
    return base * round(x/base)

In [17]:
models = ['linear', 'decision_tree', 'gradient_boosted', 'neural_network', 'random_forest', 'avg']
for model in models:
    df[model + '_predict'] = np.where(df[model] > df['line'], 'o', 'u')
    
    # Compute if model predicted correctly
    df[model + '_hit'] = ((df['3p'] > df['line']) & (df[model] > df['line'])) | \
                            ((df['3p'] < df['line']) & (df[model] < df['line']))
    
    # Compute models profit per bet
    df[model + '_potential_profit'] = np.where(
        df[model] > df['line'], df['over'], df['under'])
    
    df[model + '_potential_profit'] = np.where(
        df[model + '_potential_profit'] > 0, df[model + '_potential_profit'] / 100, -100 / df[model + '_potential_profit'])
    
    df[model + '_profit'] = np.where(df[model + '_hit'], df[model + '_potential_profit'], -1)
    
    # Compute difference between line and prediction
    df[model + '_diff'] = myround(df[model] - df['line'])

df.tail()

Unnamed: 0,date,visitor,home,linear,decision_tree,gradient_boosted,neural_network,random_forest,line,avg,over,under,3p,linear_predict,linear_hit,linear_potential_profit,linear_profit,linear_diff,decision_tree_predict,decision_tree_hit,decision_tree_potential_profit,decision_tree_profit,decision_tree_diff,gradient_boosted_predict,gradient_boosted_hit,gradient_boosted_potential_profit,gradient_boosted_profit,gradient_boosted_diff,neural_network_predict,neural_network_hit,neural_network_potential_profit,neural_network_profit,neural_network_diff,random_forest_predict,random_forest_hit,random_forest_potential_profit,random_forest_profit,random_forest_diff,avg_predict,avg_hit,avg_potential_profit,avg_profit,avg_diff
139,2022-03-14,Milwaukee Bucks,Utah Jazz,29,31,29,30,26,29.5,29.0,100.0,-140.0,29.0,u,True,0.714286,0.714286,-0.5,o,False,1.0,-1.0,1.5,u,True,0.714286,0.714286,-0.5,o,False,1.0,-1.0,0.5,u,True,0.714286,0.714286,-3.5,u,True,0.714286,0.714286,-0.5
140,2022-03-14,Minnesota Timberwolves,San Antonio Spurs,26,26,26,26,26,26.5,26.0,110.0,-150.0,30.0,u,False,0.666667,-1.0,-0.5,u,False,0.666667,-1.0,-0.5,u,False,0.666667,-1.0,-0.5,u,False,0.666667,-1.0,-0.5,u,False,0.666667,-1.0,-0.5,u,False,0.666667,-1.0,-0.5
141,2022-03-14,Portland Trail Blazers,Atlanta Hawks,25,25,26,26,26,26.5,25.6,110.0,-150.0,22.0,u,True,0.666667,0.666667,-1.5,u,True,0.666667,0.666667,-1.5,u,True,0.666667,0.666667,-0.5,u,True,0.666667,0.666667,-0.5,u,True,0.666667,0.666667,-0.5,u,True,0.666667,0.666667,-1.0
142,2022-03-14,Toronto Raptors,Los Angeles Lakers,23,22,23,24,23,24.5,23.0,100.0,-140.0,23.0,u,True,0.714286,0.714286,-1.5,u,True,0.714286,0.714286,-2.5,u,True,0.714286,0.714286,-1.5,u,True,0.714286,0.714286,-0.5,u,True,0.714286,0.714286,-1.5,u,True,0.714286,0.714286,-1.5
143,2022-03-14,Washington Wizards,Golden State Warriors,26,26,25,25,26,24.5,25.6,-145.0,105.0,28.0,o,True,0.689655,0.689655,1.5,o,True,0.689655,0.689655,1.5,o,True,0.689655,0.689655,0.5,o,True,0.689655,0.689655,0.5,o,True,0.689655,0.689655,1.5,o,True,0.689655,0.689655,1.0


In [18]:
unit = 100

for model in models:
    profit = df[model + '_profit'].sum()
    correct = df[df[model + '_hit'] == True][model + '_hit'].count()
    wrong = df[df[model + '_hit'] == False][model + '_hit'].count()
    
    print(model)
    print(f'\tRecord: {correct} - {wrong}')
    print(f'\tAccuracy: {round((correct / (correct + wrong)) * 100)}%')
    print(f'\tProfit: ${round(profit * unit)}  (given a ${unit} unit)\n')

linear
	Record: 72 - 65
	Accuracy: 53%
	Profit: $-836  (given a $100 unit)

decision_tree
	Record: 78 - 59
	Accuracy: 57%
	Profit: $385  (given a $100 unit)

gradient_boosted
	Record: 77 - 60
	Accuracy: 56%
	Profit: $66  (given a $100 unit)

neural_network
	Record: 75 - 62
	Accuracy: 55%
	Profit: $-150  (given a $100 unit)

random_forest
	Record: 80 - 57
	Accuracy: 58%
	Profit: $713  (given a $100 unit)

avg
	Record: 78 - 59
	Accuracy: 57%
	Profit: $313  (given a $100 unit)



In [19]:
for model in models:
    temp = df.groupby([model + '_diff'])[[model + '_hit', model + '_profit']].aggregate(['sum', 'count'])
    temp['accuracy'] = round(temp[(model + '_hit', 'sum')] / temp[(model + '_hit', 'count')] * 100)
    temp['profit'] = round(temp[(model + '_profit', 'sum')], 2)
    temp['count'] = temp[(model + '_hit', 'count')]
    print(temp[['profit', 'accuracy', 'count']])

            profit accuracy count
                                 
linear_diff                      
-2.5         -1.20     33.0     3
-1.5          0.09     60.0    15
-0.5         -0.48     54.0    46
 0.5        -11.96     40.0    45
 1.5          3.69     67.0    21
 2.5          2.50     83.0     6
 3.5         -1.00      0.0     1
                   profit accuracy count
                                        
decision_tree_diff                      
-3.5                -1.00      0.0     1
-2.5                -0.77     50.0     6
-1.5                 6.67     74.0    23
-0.5                -0.62     52.0    40
 0.5                -1.75     51.0    35
 1.5                 3.52     65.0    23
 2.5                -0.78     50.0     6
 3.5                 0.59    100.0     1
 4.5                -1.00      0.0     1
 5.5                -1.00      0.0     1
                      profit accuracy count
                                           
gradient_boosted_diff                  

In [20]:
myround(.3)

0.5

## Make picks

In [21]:
models = ['linear', 'decision_tree', 'gradient_boosted', 'neural_network', 'random_forest', 'avg']
today_df['profit'] = 0
today_df['accuracy'] = 0
today_df['count'] = 0
for model in models:
    # Differential profit and accuarcy
    temp = df.groupby([model + '_diff'])[[model + '_hit', model + '_profit']].aggregate(['sum', 'count'])
    temp['accuracy'] = round(temp[(model + '_hit', 'sum')] / temp[(model + '_hit', 'count')] * 100)
    temp['profit'] = round(temp[(model + '_profit', 'sum')], 2)
    temp['count'] = temp[(model + '_hit', 'count')]
    temp = temp.reset_index()
    temp.columns = [col[0] for col in temp.columns]
    temp = temp[[model + '_diff', 'accuracy', 'profit', 'count']]

    # Compute difference between line and prediction
    today_df[model + '_diff'] = myround(today_df[model] - today_df['line'])
    
    # Merge differential's profit and accuracy
    today_df = pd.merge(today_df, temp, left_on = [model + '_diff'], right_on=[model + '_diff'], how='left', suffixes=('', '_' + model))

In [22]:
models = ['linear', 'decision_tree', 'gradient_boosted', 'neural_network', 'random_forest', 'avg']

# Expected profit
for model in models:
    today_df[model + '_expected_profit'] = today_df['profit_' + model] * today_df['accuracy_' + model]

# Find model with max expected profit
models = ['linear', 'decision_tree', 'gradient_boosted', 'neural_network', 'random_forest', 'avg']
models = [model + '_expected_profit' for model in models]

today_df = today_df.fillna(0)

today_df['total_expected_profit'] = \
            today_df['linear_expected_profit'] + today_df['decision_tree_expected_profit'] + \
            today_df['gradient_boosted_expected_profit'] + today_df['neural_network_expected_profit'] + \
            today_df['random_forest_expected_profit'] + today_df['avg_expected_profit']

today_df['max_expected_profit'] = today_df[models].idxmax(axis=1)
today_df['max_expected_profit'] = today_df['max_expected_profit'].apply(lambda x: '_'.join(x.split('_')[:-2]))

# Predictions for models that result in max expected profit
predictions = []
for index, row in today_df.iterrows():
    model = row['max_expected_profit']
    predictions.append(row[model])

today_df['prediction'] = predictions
today_df['over_under'] = np.where(today_df['prediction'] > today_df['line'], 'o', 'u')

In [23]:
models = ['linear', 'decision_tree', 'gradient_boosted', 'neural_network', 'random_forest', 'avg']
expected_profits = [model + '_expected_profit' for model in models]

for model in models:
    today_df[model] = np.where(today_df[model] > today_df['line'], 'o', 'u')

cols = ['date', 'visitor', 'home', 'line'] + ['total_expected_profit', 'max_expected_profit'] + models + expected_profits
today_df[cols].sort_values(['total_expected_profit'], ascending=False)

Unnamed: 0,date,visitor,home,line,total_expected_profit,max_expected_profit,linear,decision_tree,gradient_boosted,neural_network,random_forest,avg,linear_expected_profit,decision_tree_expected_profit,gradient_boosted_expected_profit,neural_network_expected_profit,random_forest_expected_profit,avg_expected_profit
3,2022-03-15,Phoenix Suns,New Orleans Pelicans,21.5,411.06,linear,o,o,o,o,o,o,207.5,59.0,-0.0,74.0,70.56,0.0
2,2022-03-15,Memphis Grizzlies,Indiana Pacers,21.5,294.17,linear,o,o,o,o,o,o,247.23,59.0,-0.0,-90.75,78.69,0.0
1,2022-03-15,Detroit Pistons,Miami Heat,24.5,-112.13,neural_network,u,o,u,u,u,u,5.4,-89.25,-121.5,337.68,-156.96,-87.5
0,2022-03-15,Brooklyn Nets,Orlando Magic,24.5,-444.55,gradient_boosted,o,o,o,o,o,o,-478.4,-89.25,203.4,-128.5,70.56,-22.36


In [24]:
today_df = today_df[['date', 'visitor', 'home', 'over_under', 'prediction', 'line', 'over', 'under']]
algo_predictions = pd.read_csv('backend/predictions/3p_predictions_algo.csv').drop(['Unnamed: 0'], axis=1)
algo_predictions = algo_predictions.append(today_df, ignore_index=True)
algo_predictions['date'] = pd.to_datetime(algo_predictions['date'])
algo_predictions = algo_predictions.drop_duplicates(['date', 'home', 'visitor'])
algo_predictions.to_csv('backend/predictions/3p_predictions_algo.csv')
algo_predictions.tail(10)

Unnamed: 0,date,visitor,home,over_under,prediction,line,over,under
16,2022-03-14,Los Angeles Clippers,Cleveland Cavaliers,u,22.4,24.5,100.0,-140.0
17,2022-03-14,Milwaukee Bucks,Utah Jazz,u,29.0,29.5,100.0,-140.0
18,2022-03-14,Minnesota Timberwolves,San Antonio Spurs,u,26.0,26.5,110.0,-150.0
19,2022-03-14,Portland Trail Blazers,Atlanta Hawks,u,26.0,26.5,110.0,-150.0
20,2022-03-14,Toronto Raptors,Los Angeles Lakers,u,24.0,24.5,100.0,-140.0
21,2022-03-14,Washington Wizards,Golden State Warriors,o,26.0,24.5,-145.0,105.0
22,2022-03-15,Brooklyn Nets,Orlando Magic,o,25.0,24.5,115.0,-160.0
23,2022-03-15,Detroit Pistons,Miami Heat,u,24.0,24.5,-115.0,-120.0
24,2022-03-15,Memphis Grizzlies,Indiana Pacers,o,23.0,21.5,-170.0,120.0
25,2022-03-15,Phoenix Suns,New Orleans Pelicans,o,24.0,21.5,-140.0,105.0
