In [None]:
# 📦 Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from itertools import combinations
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings("ignore")

In [None]:
# ---------------------------------------------
# 🗂️ Step 1: Load and visualize dataset
# ---------------------------------------------
columns = ['Variance', 'Skewness', 'Kurtosis', 'Entropy', 'Class']
data_path = r"C:\Users\Asus\Desktop\data_banknote_authentication.txt"
df = pd.read_csv(data_path, header=None, names=columns)

# Feature pair scatter plots
combs = list(combinations(columns[:-1], 2))
plt.figure(figsize=(15, 10))
for idx, (x, y) in enumerate(combs, 1):
    plt.subplot(2, 3, idx)
    sns.scatterplot(data=df, x=x, y=y, hue='Class', palette='Set2')
    plt.title(f"{x} vs {y}")
plt.suptitle("Feature Pair Visualizations", fontsize=16)
plt.tight_layout()
plt.show()

In [None]:
# ---------------------------------------------
# 🧼 Step 2: Preprocessing
# ---------------------------------------------
X = df.drop("Class", axis=1)
y = df["Class"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

In [None]:
# ---------------------------------------------
# ✅ Step 3: Evaluation Function (Fixed – No Double Plots)
# ---------------------------------------------
def evaluate_model(clf, X_test, y_test, criterion, depth):
    y_pred = clf.predict(X_test)

    print(f"\n📌 Criterion: {criterion.upper()} | Max Depth: {depth}")
    print("---------------------------------------------------")
    print(classification_report(y_test, y_pred, digits=4))

    # Proper confusion matrix (no double plot)
    cm = confusion_matrix(y_test, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Fake", "Authentic"])
    disp.plot(cmap='plasma')  # Yellow-purple color scheme
    disp.ax_.set_title(f"Confusion Matrix ({criterion}, depth={depth})", fontsize=12)
    plt.tight_layout()
    plt.show()

In [None]:
# ---------------------------------------------
# 🔁 Step 4: Try multiple hyperparameters
# ---------------------------------------------
criteria = ["gini", "entropy"]
max_depths = [3, 5, 10]
min_samples = [2, 5]

for criterion in criteria:
    for depth in max_depths:
        for min_split in min_samples:
            clf = DecisionTreeClassifier(
                criterion=criterion,
                max_depth=depth,
                min_samples_split=min_split,
                random_state=42
            )
            clf.fit(X_train, y_train)
            evaluate_model(clf, X_test, y_test, criterion, depth)

In [None]:
# ---------------------------------------------
# 🌳 Step 5: Final tree + feature importance
# ---------------------------------------------
def plot_final_tree_and_importance(clf, criterion_label):
    # Tree visualization
    plt.figure(figsize=(16, 8))
    plot_tree(
        clf,
        feature_names=columns[:-1],
        class_names=["Fake", "Authentic"],
        filled=True,
        rounded=True
    )
    plt.title(f"Decision Tree (criterion = '{criterion_label}')", fontsize=14)
    plt.show()

    # Feature importance
    importances = clf.feature_importances_
    indices = np.argsort(importances)[::-1]
    sorted_features = np.array(columns[:-1])[indices]

    plt.figure(figsize=(8, 5))
    sns.barplot(x=importances[indices], y=sorted_features, palette='plasma')
    plt.title(f"Feature Importance (criterion = '{criterion_label}')", fontsize=13)
    plt.xlabel("Importance Score")
    plt.ylabel("Features")
    plt.tight_layout()
    plt.show()

In [None]:
# Final models
final_entropy = DecisionTreeClassifier(criterion="entropy", max_depth=5, random_state=42)
final_entropy.fit(X_train, y_train)
plot_final_tree_and_importance(final_entropy, "entropy")

final_gini = DecisionTreeClassifier(criterion="gini", max_depth=5, random_state=42)
final_gini.fit(X_train, y_train)
plot_final_tree_and_importance(final_gini, "gini")