In [None]:
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
model = LinearRegression()

In [None]:
data_21 = pd.read_csv("Data/2021.csv")
X_21 = data_21[["MOV/A", "ORtg/A", "DRtg/A", "NRtg/A"]]
teams = data_21.Team

In [None]:
data = pd.read_csv("Data/2000 to 2020 data.csv")

In [None]:
data.head()

In [None]:
X = data[["MOV/A", "ORtg/A", "DRtg/A", "NRtg/A"]]
y = data["W"].values.reshape(-1,1)
feature_names = X.columns
print(X.shape, y.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
model.fit(X_train, y_train)

training_score = model.score(X_train, y_train)
testing_score = model.score(X_test, y_test)

print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")

In [None]:
plt.scatter(model.predict(X_train), model.predict(X_train) - y_train, c="blue", label="Training Data")
plt.scatter(model.predict(X_test), model.predict(X_test) - y_test, c="orange", label="Testing Data")
plt.legend()
plt.hlines(y=0, xmin=y.min(), xmax=y.max())
plt.title("Residual Plot")

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

In [None]:
sorted(zip(rf.feature_importances_, feature_names), reverse=True)

In [None]:
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt

corrMatrix = data[["MOV/A", "ORtg/A", "DRtg/A", "NRtg/A"]].corr()
sn.heatmap(corrMatrix, annot=True)
plt.show()

In [None]:
predictions = model.predict(X_21)
print(f"First 32 Predictions:   {predictions[:32]}")
print(f"First 32 Actual labels: {y_test[:32].tolist()}")

In [None]:
pred = [x[0] for x in predictions]
preds = pd.Series(pred)
predictions_df = pd.concat([teams, preds],axis =1)
predictions_df.rename(columns = {0: "Predicted Wins"}, inplace=True)
predictions_df


In [None]:
X1 = data[["MOV/A", "ORtg/A", "DRtg/A"]]
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D


from sklearn import decomposition
from sklearn import datasets

np.random.seed(5)

centers = [[1, 1], [-1, -1], [1, -1]]

fig = plt.figure(1, figsize=(4, 3))
plt.clf()
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)

plt.cla()
pca = decomposition.PCA(n_components=3)
pca.fit(X1)
X1 = pca.transform(X1)

# for name, label in [('MOV/A', 0), ('ORtg/A', 1), ('DRtg/A', 2)]:
#     ax.text3D(X1[y == label, 0].mean(),
#               X1[y == label, 1].mean() + 1.5,
#               X1[y == label, 2].mean() ,name,
        #      horizontalalignment='center',
         #     bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))
# Reorder the labels to have colors matching the cluster results
# y = np.choose(y, [1, 2, 0]).astype(float)
plt.scatter(X1[:, 0], X1[:, 1], X1[:, 2], c=y, cmap=plt.cm.nipy_spectral,
           edgecolor='k')

ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])


In [None]:
mvp_hist = pd.read_csv("Data/MVP Data History.csv")
mvp_hist.head(100)

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
mvp_hist = mvp_hist.fillna(0)
X_mvp = mvp_hist[["Age", "PTS", "TRB", "AST", "STL", "BLK", "FG%", "3P%", "FT%"]]
y_df=pd.get_dummies(mvp_hist["MVP"], drop_first=True)
#print(y_df)
y_mvp = y_df["Y"].values.reshape(-1,1)
X_mvp_train, X_mvp_test, y_mvp_train, y_mvp_test = train_test_split(X_mvp, y_mvp, random_state=42)
clf = MLPClassifier(random_state=1, max_iter=300).fit(X_mvp_train, y_mvp_train)
mvp_prob=clf.predict_proba(X_mvp_test)
mvp_predict=clf.predict(X_mvp_test)
mvp_score=clf.score(X_mvp_test, y_mvp_test)
print(mvp_prob)
print(mvp_predict)
print(mvp_score)


In [None]:
current_year = pd.read_csv("Data/Current_Season_Stats.csv")
current_year.head()

In [None]:
clean_stats = current_year[["Age", "PTS", "TRB", "AST", "STL", "BLK", "FG%", "3P%", "FT%"]]
clean_stats = clean_stats.fillna(0)

In [None]:
mvp_21_prediction = clf.predict(clean_stats)
print(mvp_21_prediction)

In [None]:
mvp_prob=clf.predict_proba(clean_stats)
print(mvp_prob)
y_probability=[]
n_probability=[]
for prob in mvp_prob:
    y_probability.append(prob[1])
    n_probability.append(prob[0])
players = current_year["Player"].tolist()
df=pd.DataFrame({"Player": players, "prediction": mvp_21_prediction, "mvp prob": y_probability})
df

In [None]:
df=df.sort_values(by=["mvp prob"],ascending=False)
df["mvp prob"] = df["mvp prob"].map(lambda x: round(x * 100, 2))
df
df.to_csv("Data/MVP_prediction.csv")

In [None]:
roy_hist = pd.read_csv("Data/Rookies of the year.csv")
roy_hist.head()

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
roy_hist = roy_hist.fillna(0)
X_roy = roy_hist[["MP", "PTS", "TRB", "AST"]]
roy_y_df=pd.get_dummies(roy_hist["ROY"], drop_first=True)
#print(roy_y_df)
y_roy = roy_y_df["Y"].values.reshape(-1,1)
X_roy_train, X_roy_test, y_roy_train, y_roy_test = train_test_split(X_roy, y_roy, random_state=42)
clf = MLPClassifier(random_state=1, max_iter=300).fit(X_roy_train, y_roy_train)
roy_prob=clf.predict_proba(X_roy_test)
roy_predict=clf.predict(X_roy_test)
roy_score=clf.score(X_roy_test, y_roy_test)
print(roy_prob)
print(roy_predict)
print(roy_score)

In [None]:
current_rookies=pd.read_csv("Data/current rookies.csv")
current_rookies.head()

In [None]:
clean_rookies = current_rookies[["MP", "PTS", "TRB", "AST"]]
clean_rookies = clean_rookies.fillna(0)
clean_rookies

In [None]:
roy_21_prediction = clf.predict(clean_rookies)
print(roy_21_prediction)

In [None]:
roy_prob=clf.predict_proba(clean_rookies)
print(roy_prob)
y_probability=[]
n_probability=[]
for prob in roy_prob:
    y_probability.append(prob[1])
    n_probability.append(prob[0])
rookie = current_rookies["Player"].tolist()
roy_df=pd.DataFrame({"Player": rookie, "prediction": roy_21_prediction, "ROY_prob": y_probability})
roy_df

In [None]:
roy_df=roy_df.sort_values(by=["ROY_prob"],ascending=False)
roy_df["ROY_prob"] = roy_df["ROY_prob"].map(lambda x: round(x * 100, 2))
roy_df



In [None]:
roy_df.to_csv("Data/ROY_prediction.csv")