<a href="https://colab.research.google.com/github/varshith-k31/ML_23AG1A66F4/blob/main/Untitled5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
# The wine dataset is great for classification examples.
wine = load_wine()
X = pd.DataFrame(wine.data, columns=wine.feature_names)
y = pd.Series(wine.target)

# Split data into training and testing sets
# 80% for training, 20% for testing.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the Decision Tree model
# Using default parameters first.
dt_default = DecisionTreeClassifier(random_state=42)
dt_default.fit(X_train, y_train)

# Make predictions and evaluate the model
y_pred_default = dt_default.predict(X_test)
accuracy_default = accuracy_score(y_test, y_pred_default)

print(f"Accuracy of default Decision Tree: {accuracy_default:.4f}")

Accuracy of default Decision Tree: 0.9444


In [7]:

from sklearn.model_selection import GridSearchCV

# 1. Define the parameter grid to search
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10, 15, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# 2. Set up GridSearchCV
# The estimator is our Decision Tree model.
# cv=5 means 5-fold cross-validation.
# n_jobs=-1 uses all available CPU cores to speed up the process.
dt = DecisionTreeClassifier(random_state=42)
grid_search = GridSearchCV(estimator=dt, param_grid=param_grid,
                           cv=5, n_jobs=-1, verbose=1, scoring='accuracy')

# 3. Fit GridSearchCV to the training data
# This will train the model with every parameter combination.
grid_search.fit(X_train, y_train)

# 4. Get the best parameters and the best model
print("\nBest Parameters found by GridSearchCV:")
print(grid_search.best_params_)

best_dt = grid_search.best_estimator_

# 5. Evaluate the tuned model
y_pred_tuned = best_dt.predict(X_test)
accuracy_tuned = accuracy_score(y_test, y_pred_tuned)

print(f"\nAccuracy of default Decision Tree: {accuracy_default:.4f}")
print(f"Accuracy of tuned Decision Tree:   {accuracy_tuned:.4f} ✨")

Fitting 5 folds for each of 90 candidates, totalling 450 fits

Best Parameters found by GridSearchCV:
{'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 10}

Accuracy of default Decision Tree: 0.9444
Accuracy of tuned Decision Tree:   0.9444 ✨
