In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, LinearRegression, Lasso
from sklearn.preprocessing import StandardScaler 
from datetime import datetime, timedelta
from sklearn.model_selection import GridSearchCV
import sklearn.metrics
import warnings

warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('matchup_stats.csv')
prev_matchups = df[df['pts_d'] != 0]

column_names = df.columns.values[2:24]

y = prev_matchups['pts_d'].to_numpy()
X = prev_matchups.to_numpy()[:,2:24]


In [3]:
scaler = StandardScaler() 
scaled_X = scaler.fit_transform(X) 
  
X_train, X_test, y_train, y_test = train_test_split(scaled_X, 
                                                    y, 
                                                    test_size = 0.2) 


In [4]:
RidgeRegression = Ridge()
hyperParameters = {'alpha':[1e-15,1e-10,1e-8,1e-3,1e-2,1,5,10,20,30,35,40,45,50,55,100]}
ridgeRegressor = GridSearchCV(RidgeRegression, hyperParameters, scoring='r2', cv=5)
ridgeRegressor.fit(X_train,y_train)

# print("Best value for lambda : ",ridgeRegressor.best_params_)
# print("Best score for cost function: ", ridgeRegressor.best_score_)

rdg=ridgeRegressor.best_estimator_
rdg_score=rdg.score(X_test, y_test)
# print("Model score: ", rdg_score, "\n")

for i in range(0, 22):
    print(column_names[i], ": ", rdg.coef_[i])

fg_per_g :  0.07716762498923574
fga_per_g :  -2.218903669797598
fg_pct :  1.4411613379407928
fg3_per_g :  0.6327348209728226
fg3a_per_g :  -0.563228450045541
fg3_pct :  1.2120045702635873
fg2_per_g :  -0.3611607246033381
fg2a_per_g :  -0.6974892473023745
fg2_pct :  1.3891592166283941
ft_per_g :  -0.1435844472235165
fta_per_g :  0.49029445235114816
ft_pct :  0.6538286969049744
orb_per_g :  1.948533944253644
drb_per_g :  1.2579186725498928
trb_per_g :  1.7673197889837684
ast_per_g :  1.25214624146937
stl_per_g :  3.2061774984093643
blk_per_g :  0.3013487261544057
tov_per_g :  -2.7580428177612726
pf_per_g :  0.4927806315544317
pts_per_g :  0.03259470988325738
year :  0.4071431033167805


In [5]:
LassoRegression = Lasso(tol=1e-2, max_iter=10000)
hyperParameters = {'alpha':[1e-15,1e-10,1e-8,1e-3,1e-2,1,5,10,20,30,35,40,45,50,55,100]
                   }
LassoRegressor = GridSearchCV(LassoRegression, hyperParameters, scoring='r2', cv=5)
LassoRegressor.fit(X_train, y_train)

# print("Best value for lambda : ",LassoRegressor.best_params_)
# print("Best score for cost function: ", LassoRegressor.best_score_)

lso=LassoRegressor.best_estimator_
lso_score=lso.score(X_test, y_test)
# print("Model score: ", lso_score, "\n")

for i in range(0, 22):
    print(column_names[i], ": ", lso.coef_[i])

fg_per_g :  3.3500731874828915
fga_per_g :  -3.0775217255068403
fg_pct :  2.545483657808303
fg3_per_g :  2.8103433951920223
fg3a_per_g :  -0.0
fg3_pct :  0.5347132580204297
fg2_per_g :  -0.0
fg2a_per_g :  -0.0
fg2_pct :  0.2050837786305268
ft_per_g :  0.861408372283345
fta_per_g :  0.9635416762537898
ft_pct :  0.8408345956392231
orb_per_g :  3.2888500732844426
drb_per_g :  3.0065012943421485
trb_per_g :  0.0
ast_per_g :  1.2957409563645035
stl_per_g :  3.3043960508156225
blk_per_g :  0.3233072042222047
tov_per_g :  -3.016716966639669
pf_per_g :  0.7226272609909359
pts_per_g :  -3.9888132167003874
year :  0.4113348117428575


In [6]:
lin = LinearRegression() 
lin.fit(X_train, y_train) 
y_pred = lin.predict(X_test)
lin_score = lin.score(X_test, y_test) 
# print("Model score : ", lin_score, "\n")

for i in range(0, 22):
    print(column_names[i], ": ", lin.coef_[i])

fg_per_g :  -4.632460468325213
fga_per_g :  16.08859828553104
fg_pct :  3.4614703302470486
fg3_per_g :  20.978382207381998
fg3a_per_g :  -33.53976760798667
fg3_pct :  1.170587393351606
fg2_per_g :  9.637624700633872
fg2a_per_g :  -28.67899187060708
fg2_pct :  8.09757805144973
ft_per_g :  -6.034142229368679
fta_per_g :  13.153266362036213
ft_pct :  5.354034605578998
orb_per_g :  17.300243356865877
drb_per_g :  20.77704183870123
trb_per_g :  -21.430665459862233
ast_per_g :  0.7924309488730793
stl_per_g :  3.402198879855665
blk_per_g :  0.14498701665218416
tov_per_g :  -2.69457371185605
pf_per_g :  0.6655811695475902
pts_per_g :  -17.847628098623588
year :  0.42161497775341666


In [7]:
def isWin(team_a, team_b, pts_d):
    if pts_d > 0:
        outp = team_a + " beats " + team_b + " by " + str(round(pts_d, 1)) + "points" 
    else:
        outp = team_b + " beats " + team_a + " by " + str(round(abs(pts_d), 1)) + "points"
    return(outp)

In [19]:
models = [lin, rdg, lso]
s = '-'*50

today_dt=datetime.today()
today = today_dt.strftime("%a, %b %d, %Y")


for _ in range(0, 7):
    today_dt += timedelta(days=1)
    today = today_dt.strftime("%a, %b %d, %Y")
    home_teams=df[df['date']==today]['team_a'].to_list()
    visitor_teams=df[df['date']==today]['team_b'].to_list()
    
    print(s, '\n', today, "\n", s)

    try:
        X_today=df[df['date']==today].drop(['pts_d', 'mp_per_g', 'date', 'team_a', 'team_b'], axis=1).to_numpy()[:,1:]
        X_today_scaled = scaler.transform(X_today)
        for model in models:
            print("Model Name: ", model, "\nModel Score: ", round(model.score(X_test, y_test), 3))
            for j in range(0, len(home_teams)):
                print("    ", isWin(home_teams[j], visitor_teams[j], model.predict(X_today_scaled)[j]))
            print("\n")
            
    except:
        print("No games on ", today)

IndentationError: unexpected indent (951319774.py, line 13)