 ### Bank Marketing campaign outcome prediction with deep learning

This ➡️ dataset contains banking marketing campaign data and we can use it to optimize marketing campaigns to attract more customers to term deposit subscription.

What is a Term Deposit?

A Term deposit is a deposit that a bank or a financial institution offers with a fixed rate (often better than just opening deposit account) in which your money will be returned back at a specific maturity time.

Dataset link: https://drive.google.com/file/d/1kVnOzZ84avAAY1mA2I_Oh6FH3UPEb-_3/view?usp=sharing

#### Importing necessary libraries

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

#### Loading my dataset and perform data exploration

In [5]:
df = pd.read_csv("bank.csv")

In [7]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,admin.,married,secondary,no,2343,yes,no,unknown,5,may,1042,1,-1,0,unknown,yes
1,56,admin.,married,secondary,no,45,no,no,unknown,5,may,1467,1,-1,0,unknown,yes
2,41,technician,married,secondary,no,1270,yes,no,unknown,5,may,1389,1,-1,0,unknown,yes
3,55,services,married,secondary,no,2476,yes,no,unknown,5,may,579,1,-1,0,unknown,yes
4,54,admin.,married,tertiary,no,184,no,no,unknown,5,may,673,2,-1,0,unknown,yes


In [9]:
df.shape

(11162, 17)

In [16]:
df.isnull().sum()

age          0
job          0
marital      0
education    0
default      0
balance      0
housing      0
loan         0
contact      0
day          0
month        0
duration     0
campaign     0
pdays        0
previous     0
poutcome     0
deposit      0
dtype: int64

In [18]:
df.duplicated().sum()

0

Dataset has no missing values

In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11162 entries, 0 to 11161
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        11162 non-null  int64 
 1   job        11162 non-null  object
 2   marital    11162 non-null  object
 3   education  11162 non-null  object
 4   default    11162 non-null  object
 5   balance    11162 non-null  int64 
 6   housing    11162 non-null  object
 7   loan       11162 non-null  object
 8   contact    11162 non-null  object
 9   day        11162 non-null  int64 
 10  month      11162 non-null  object
 11  duration   11162 non-null  int64 
 12  campaign   11162 non-null  int64 
 13  pdays      11162 non-null  int64 
 14  previous   11162 non-null  int64 
 15  poutcome   11162 non-null  object
 16  deposit    11162 non-null  object
dtypes: int64(7), object(10)
memory usage: 1.4+ MB


In [23]:
df["job"].value_counts()

job
management       2566
blue-collar      1944
technician       1823
admin.           1334
services          923
retired           778
self-employed     405
student           360
unemployed        357
entrepreneur      328
housemaid         274
unknown            70
Name: count, dtype: int64

In [25]:
df["marital"].value_counts()

marital
married     6351
single      3518
divorced    1293
Name: count, dtype: int64

In [27]:
df["education"].value_counts()

education
secondary    5476
tertiary     3689
primary      1500
unknown       497
Name: count, dtype: int64

In [29]:
df["default"].value_counts()

default
no     10994
yes      168
Name: count, dtype: int64

In [31]:
df["housing"].value_counts()

housing
no     5881
yes    5281
Name: count, dtype: int64

In [33]:
df["loan"].value_counts()

loan
no     9702
yes    1460
Name: count, dtype: int64

In [35]:
df["contact"].value_counts()

contact
cellular     8042
unknown      2346
telephone     774
Name: count, dtype: int64

In [37]:
df["month"].value_counts()

month
may    2824
aug    1519
jul    1514
jun    1222
nov     943
apr     923
feb     776
oct     392
jan     344
sep     319
mar     276
dec     110
Name: count, dtype: int64

In [39]:
df["job"].value_counts()

job
management       2566
blue-collar      1944
technician       1823
admin.           1334
services          923
retired           778
self-employed     405
student           360
unemployed        357
entrepreneur      328
housemaid         274
unknown            70
Name: count, dtype: int64

In [41]:
df["poutcome"].value_counts()

poutcome
unknown    8326
failure    1228
success    1071
other       537
Name: count, dtype: int64

In [43]:
df["deposit"].value_counts()

deposit
no     5873
yes    5289
Name: count, dtype: int64

#### Encoding categorical columns

In [45]:
df['deposit'] = df['deposit'].map({'yes': 1, 'no': 0})
df['contact'] = df['contact'].map( {'cellular': 1, 'telephone': 2, 'unknown': 0})
df['loan'] = df['loan'].map({'yes': 1, 'no': 0})
df['housing'] = df['housing'].map({'yes': 1, 'no': 0})
df['default'] = df['default'].map({'yes': 1, 'no': 0})
df['marital'] = df['marital'].map({'married': 1, 'single': 2, 'divorced': 3})

In [47]:
df = pd.get_dummies(df, columns=['job', 'education', 'month', 'poutcome'], drop_first=True)

In [49]:
from sklearn.preprocessing import LabelEncoder

# Initialize LabelEncoder
le = LabelEncoder()

# Encode the 'deposit' column (target variable)
df['deposit'] = le.fit_transform(df['deposit'])

In [51]:
df.head()

Unnamed: 0,age,marital,default,balance,housing,loan,contact,day,duration,campaign,...,month_jul,month_jun,month_mar,month_may,month_nov,month_oct,month_sep,poutcome_other,poutcome_success,poutcome_unknown
0,59,1,0,2343,1,0,0,5,1042,1,...,False,False,False,True,False,False,False,False,False,True
1,56,1,0,45,0,0,0,5,1467,1,...,False,False,False,True,False,False,False,False,False,True
2,41,1,0,1270,1,0,0,5,1389,1,...,False,False,False,True,False,False,False,False,False,True
3,55,1,0,2476,1,0,0,5,579,1,...,False,False,False,True,False,False,False,False,False,True
4,54,1,0,184,0,0,0,5,673,2,...,False,False,False,True,False,False,False,False,False,True


In [53]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11162 entries, 0 to 11161
Data columns (total 41 columns):
 #   Column               Non-Null Count  Dtype
---  ------               --------------  -----
 0   age                  11162 non-null  int64
 1   marital              11162 non-null  int64
 2   default              11162 non-null  int64
 3   balance              11162 non-null  int64
 4   housing              11162 non-null  int64
 5   loan                 11162 non-null  int64
 6   contact              11162 non-null  int64
 7   day                  11162 non-null  int64
 8   duration             11162 non-null  int64
 9   campaign             11162 non-null  int64
 10  pdays                11162 non-null  int64
 11  previous             11162 non-null  int64
 12  deposit              11162 non-null  int64
 13  job_blue-collar      11162 non-null  bool 
 14  job_entrepreneur     11162 non-null  bool 
 15  job_housemaid        11162 non-null  bool 
 16  job_management       1

**Deep learning**

In [55]:
import tensorflow as tf  # TensorFlow library for building neural networks
from tensorflow.keras.models import Sequential  # Sequential model for building a feedforward neural network
from tensorflow.keras.layers import Dense, Dropout  # Dense layer for fully connected layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam  # Adam optimizer for gradient descent
from sklearn.model_selection import train_test_split  # Function to split data into training and test sets
from sklearn.preprocessing import StandardScaler  # Standardization of features (scaling)

#### Splitting the dataset

In [61]:
X = df.drop(columns=["deposit"])
y = df["deposit"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [63]:
X_train.shape

(8929, 40)

#### Scaling my data

In [65]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#### Building my model

In [67]:
model = Sequential()

#input layer
model.add(Dense(64, activation='relu', input_dim=X_train.shape[1]))
model.add(Dropout(0.3))

#hidden layers
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))

#output layer
model.add(Dense(1, activation='sigmoid'))

#Compiling the layers
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

In [69]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [71]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32, callbacks=[early_stopping])

Epoch 1/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.5303 - loss: 0.7127 - val_accuracy: 0.6588 - val_loss: 0.6391
Epoch 2/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.6489 - loss: 0.6377 - val_accuracy: 0.7358 - val_loss: 0.5766
Epoch 3/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7074 - loss: 0.5876 - val_accuracy: 0.7653 - val_loss: 0.5170
Epoch 4/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7328 - loss: 0.5405 - val_accuracy: 0.7819 - val_loss: 0.4762
Epoch 5/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7569 - loss: 0.5106 - val_accuracy: 0.8003 - val_loss: 0.4499
Epoch 6/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7691 - loss: 0.4935 - val_accuracy: 0.8047 - val_loss: 0.4319
Epoch 7/100
[1m280/28

In [72]:
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)

print(f"Test Accuracy: {test_acc * 100:.2f}%")

[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8290 - loss: 0.3912
Test Accuracy: 84.10%
