## Decision Tree Classifier Pre-pruning

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.datasets import load_iris

In [3]:
iris = load_iris()

In [4]:
## Independent Features
X = pd.DataFrame(iris['data'], columns=iris['feature_names'])

In [5]:
X.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [6]:
## Dependent Feature
y = iris['target']

In [7]:
## Splitting the data into training and testing data
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [9]:
## DT Classifier
from sklearn.tree import DecisionTreeClassifier

In [10]:
dtc = DecisionTreeClassifier()

In [11]:
params_grid = {
    "criterion": ['gini', 'entropy', 'log_loss'],
    "splitter": ['best', 'random'],
    "max_depth": [None, 1, 2, 3, 4, 5, 6, 7, 8, 10, 20, 30, 40, 50],
    "max_features": ['auto', 'sqrt', 'log2'],
    # "min_samples_split": [2, 5, 10],
}

In [12]:
from sklearn.model_selection import GridSearchCV

In [13]:
grid = GridSearchCV(dtc, param_grid=params_grid, cv=5, scoring="accuracy", n_jobs=-1)

In [14]:
import warnings
warnings.filterwarnings("ignore")

In [15]:
grid.fit(X_train, y_train)

In [16]:
grid.best_params_

{'criterion': 'gini',
 'max_depth': 8,
 'max_features': 'log2',
 'splitter': 'best'}

In [17]:
grid.best_score_

np.float64(0.9619047619047618)

In [18]:
y_pred = grid.predict(X_test)

In [19]:
## Metrics
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [20]:
print("Accuracy Score: ", accuracy_score(y_test, y_pred))
print("Classification Report: ", classification_report(y_test, y_pred))
print("Confusion Matrix: ", confusion_matrix(y_test, y_pred))

Accuracy Score:  1.0
Classification Report:                precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

Confusion Matrix:  [[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
