In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np

In [None]:
# Load dataset
teams_df = pd.read_csv("../data/teams.csv")
teams_df.head()

In [None]:
# Drop unnecessary columns
teams_df = teams_df.drop(["rank", "seeded", "lgID", "tmID", "franchID", "confID", "divID", "name", "arena", "firstRound", "semis", "finals"], axis=1)

# Convert "playoff" column to binary (Y: 1, N: 0)
teams_df["playoff"] = teams_df["playoff"].map({"Y": 1, "N": 0})

teams_df.head()

In [None]:
# Split dataset in features and target variable

# Create and train model
model = DecisionTreeClassifier(random_state=42)

min_year = 7
max_year = teams_df['year'].max()

print(min_year, max_year)

for i in range(min_year, max_year + 1):
    teams_df_train = teams_df[teams_df['year'] < i]
    teams_df_test = teams_df[teams_df['year'] == i]

    X_train = teams_df_train.drop("playoff", axis=1) # Features
    y_train = teams_df_train["playoff"] # Target variable

    X_test = teams_df_test.drop("playoff", axis=1) # Features
    y_test = teams_df_test["playoff"] # Target variable

    print(f"Train/Test size for year={i}:", X_train.shape, X_test.shape, y_train.shape, y_test.shape)

    model.fit(X_train, y_train)

    #Predict the response for test dataset
    y_pred = model.predict(X_test)

    print(f"\nyear={i}, Accuracy:", accuracy_score(y_test, y_pred))
    print(f"\nyear={i}, Classification Report:\n", classification_report(y_test, y_pred))
    print(f"\nyear={i}, Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 7))
plot_tree(model, filled=True, feature_names=X_train.columns, class_names=True, rounded=True)
plt.show()
