In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [4]:
team_data = pd.read_csv("Team Summaries.csv")

In [5]:
team_data = team_data[team_data['season'] >= 1980]
team_data = team_data[team_data['team'] != 'League Average'].reset_index(drop=True)
X = team_data[["mov", "sos",	"srs",	"o_rtg",	"d_rtg",	"n_rtg",	"pace",	"f_tr",	"x3p_ar",	"ts_percent",	"e_fg_percent",	"tov_percent",	"orb_percent",	"ft_fga",	"opp_e_fg_percent",	"opp_tov_percent",	"opp_drb_percent",	"opp_ft_fga"]].values
y = team_data["playoffs"].values

X = preprocessing.StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
test_data = team_data[team_data['season'] == 2023]
team_names = test_data["team"].values.reshape(-1,1)
team_names = np.array(team_names)
test_data = test_data[["mov", "sos",	"srs",	"o_rtg",	"d_rtg",	"n_rtg",	"pace",	"f_tr",	"x3p_ar",	"ts_percent",	"e_fg_percent",	"tov_percent",	"orb_percent",	"ft_fga",	"opp_e_fg_percent",	"opp_tov_percent",	"opp_drb_percent",	"opp_ft_fga"]].values
test_data = preprocessing.StandardScaler().fit_transform(test_data)

In [7]:
lr = LogisticRegression(random_state=0).fit(X_train, y_train)
print(lr.score(X_test, y_test))

0.9211956521739131


In [8]:
nb = GaussianNB().fit(X_train, y_train)
print(nb.score(X_test, y_test))

0.9157608695652174


In [9]:
rf = RandomForestClassifier(max_depth=10, random_state=0).fit(X_train, y_train)
print(rf.score(X_test, y_test))

0.9184782608695652


In [13]:
mlp = MLPClassifier(random_state=42, max_iter=10000).fit(X_train, y_train)
print(mlp.score(X_test, y_test))

0.8804347826086957


In [14]:
knn = KNeighborsClassifier().fit(X_train, y_train)
print(knn.score(X_test, y_test))

0.8722826086956522


In [15]:
svc = SVC().fit(X_train, y_train)
print(svc.score(X_test, y_test))

0.8913043478260869


In [16]:
lr_final = LogisticRegression(random_state=0).fit(X, y)
predictions = lr_final.predict_proba(test_data)
p_arr = predictions[:,1].reshape(-1,1)

In [28]:
result = np.hstack((team_names, p_arr))
df = pd.DataFrame(result, columns = ['Team Name', 'p(Playoffs)'])
df['p(Playoffs)'] = df['p(Playoffs)'] * 100
df['p(Playoffs)'] = df['p(Playoffs)'].astype(float).round(3)
df.to_csv('Team Results.csv', index=None)
