In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, LinearRegression, Lasso
from sklearn.preprocessing import StandardScaler 
from datetime import datetime, timedelta
from sklearn.model_selection import GridSearchCV
import sklearn.metrics
import warnings

warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('matchup_stats.csv')
prev_matchups = df[df['pts_d'] != 0]

column_names = df.columns.values[2:24]

y = prev_matchups['pts_d'].to_numpy()
X = prev_matchups.to_numpy()[:,2:24]


In [3]:
scaler = StandardScaler() 
scaled_X = scaler.fit_transform(X) 
  
X_train, X_test, y_train, y_test = train_test_split(scaled_X, 
                                                    y, 
                                                    test_size = 0.2) 


In [4]:
RidgeRegression = Ridge()
hyperParameters = {'alpha':[1e-15,1e-10,1e-8,1e-3,1e-2,1,5,10,20,30,35,40,45,50,55,100]}
ridgeRegressor = GridSearchCV(RidgeRegression, hyperParameters, scoring='r2', cv=5)
ridgeRegressor.fit(X_train,y_train)

# print("Best value for lambda : ",ridgeRegressor.best_params_)
# print("Best score for cost function: ", ridgeRegressor.best_score_)

rdg=ridgeRegressor.best_estimator_
rdg_score=rdg.score(X_test, y_test)
# print("Model score: ", rdg_score, "\n")

# for i in range(0, 22):
#     print(column_names[i], ": ", rdg.coef_[i])

In [5]:
LassoRegression = Lasso(tol=1e-2, max_iter=10000)
hyperParameters = {'alpha':[1e-15,1e-10,1e-8,1e-3,1e-2,1,5,10,20,30,35,40,45,50,55,100]
                   }
LassoRegressor = GridSearchCV(LassoRegression, hyperParameters, scoring='r2', cv=5)
LassoRegressor.fit(X_train, y_train)

# print("Best value for lambda : ",LassoRegressor.best_params_)
# print("Best score for cost function: ", LassoRegressor.best_score_)

lso=LassoRegressor.best_estimator_
lso_score=lso.score(X_test, y_test)
# print("Model score: ", lso_score, "\n")

# for i in range(0, 22):
#     print(column_names[i], ": ", lso.coef_[i])

In [6]:
lin = LinearRegression() 
lin.fit(X_train, y_train) 
y_pred = lin.predict(X_test)
lin_score = lin.score(X_test, y_test) 
# print("Model score : ", lin_score, "\n")

# for i in range(0, 22):
#     print(column_names[i], ": ", lin.coef_[i])

In [7]:
def isWin(team_a, team_b, pts_d):
    if pts_d > 0:
        outp = team_a + " beats " + team_b + " by " + str(round(pts_d, 1)) + "points" 
    else:
        outp = team_b + " beats " + team_a + " by " + str(round(abs(pts_d), 1)) + "points"
    return(outp)

In [19]:
models = [lin, rdg, lso]
s = '-'*50

today_dt=datetime.today()
today = today_dt.strftime("%a, %b %d, %Y")


for _ in range(0, 7):
    today_dt += timedelta(days=1)
    today = today_dt.strftime("%a, %b %d, %Y")
    home_teams=df[df['date']==today]['team_a'].to_list()
    visitor_teams=df[df['date']==today]['team_b'].to_list()
    
    print(s, '\n', today, "\n", s)

    try:
        X_today=df[df['date']==today].drop(['pts_d', 'mp_per_g', 'date', 'team_a', 'team_b'], axis=1).to_numpy()[:,1:]
        X_today_scaled = scaler.transform(X_today)
        for model in models:
            print("Model Name: ", model, "\nModel Score: ", round(model.score(X_test, y_test), 3))
            for j in range(0, len(home_teams)):
                print("    ", isWin(home_teams[j], visitor_teams[j], model.predict(X_today_scaled)[j]))
            print("\n")
            
    except:
        print("No games on ", today)

IndentationError: unexpected indent (951319774.py, line 13)