In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [3]:
# Assuming the dataset is named 'bank-additional-full.csv'
df = pd.read_csv('bank-additional-full.csv', sep=';')


In [4]:
# For simplicity, let's use a subset of columns
features = ['age', 'job', 'marital', 'education', 'default', 'housing', 'loan', 'campaign', 'pdays', 'previous', 'poutcome', 'y']

df = df[features]

# Convert categorical variables to numerical using one-hot encoding
df = pd.get_dummies(df, columns=['job', 'marital', 'education', 'default', 'housing', 'loan', 'poutcome'], drop_first=True)

# Convert 'y' (target variable) to numerical
df['y'] = df['y'].map({'no': 0, 'yes': 1})


In [5]:
X = df.drop('y', axis=1)
y = df['y']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
# Initialize the Decision Tree model
model = DecisionTreeClassifier(random_state=42)

# Fit the model to the training data
model.fit(X_train, y_train)


In [7]:
# Predict on the test set
y_pred = model.predict(X_test)


In [8]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Confusion Matrix:\n{conf_matrix}')
print(f'Classification Report:\n{classification_rep}')


Accuracy: 0.8437465442883999
Confusion Matrix:
[[7384  568]
 [ 845  246]]
Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.93      0.91      7952
           1       0.30      0.23      0.26      1091

    accuracy                           0.84      9043
   macro avg       0.60      0.58      0.59      9043
weighted avg       0.83      0.84      0.83      9043



In [9]:
from sklearn.tree import export_text

tree_rules = export_text(model, feature_names=list(X.columns))
print(tree_rules)


|--- poutcome_success <= 0.50
|   |--- age <= 60.50
|   |   |--- housing_yes <= 0.50
|   |   |   |--- pdays <= 13.00
|   |   |   |   |--- age <= 26.50
|   |   |   |   |   |--- age <= 25.50
|   |   |   |   |   |   |--- campaign <= 8.50
|   |   |   |   |   |   |   |--- loan_yes <= 0.50
|   |   |   |   |   |   |   |   |--- marital_single <= 0.50
|   |   |   |   |   |   |   |   |   |--- education_tertiary <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- age <= 23.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   |   |--- age >  23.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- education_tertiary >  0.50
|   |   |   |   |   |   |   |   |   |   |--- job_housemaid <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 3
|   |   |   |   |   |   |   |   |   |   |--- job_housemaid >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0