# MLPClassifier
---
*Vefak Murat Akman*



Build Neural Network with using `GridSearchCV` to find best hyperparameters

`hidden_layer_sizes`: (10,10,10), (10,10,10,10), (10,10,10,10,10), (10,10,10,10,10,10)

`alpha`: 0.00001, 0.0001, 0.001, 0.01, 0.1

Dataset URL = https://archive.ics.uci.edu/ml/datasets/bank+marketing


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


dataset = pd.read_csv('bank-additional-full.csv', sep=";")

# Print some of categorical variables
print('Jobs:\n', dataset['job'].unique())
print('Marital:\n', dataset['marital'].unique())
print('Default:\n', dataset['default'].unique())
print('Housing:\n', dataset['housing'].unique())
print('Loan:\n', dataset['loan'].unique())


Jobs:
 ['housemaid' 'services' 'admin.' 'blue-collar' 'technician' 'retired'
 'management' 'unemployed' 'self-employed' 'unknown' 'entrepreneur'
 'student']
Marital:
 ['married' 'single' 'divorced' 'unknown']
Default:
 ['no' 'unknown' 'yes']
Housing:
 ['no' 'yes' 'unknown']
Loan:
 ['no' 'yes' 'unknown']


In [2]:
dataset.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,37,services,married,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
4,56,services,married,high.school,no,no,yes,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no


In [3]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41188 entries, 0 to 41187
Data columns (total 21 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   age             41188 non-null  int64  
 1   job             41188 non-null  object 
 2   marital         41188 non-null  object 
 3   education       41188 non-null  object 
 4   default         41188 non-null  object 
 5   housing         41188 non-null  object 
 6   loan            41188 non-null  object 
 7   contact         41188 non-null  object 
 8   month           41188 non-null  object 
 9   day_of_week     41188 non-null  object 
 10  duration        41188 non-null  int64  
 11  campaign        41188 non-null  int64  
 12  pdays           41188 non-null  int64  
 13  previous        41188 non-null  int64  
 14  poutcome        41188 non-null  object 
 15  emp.var.rate    41188 non-null  float64
 16  cons.price.idx  41188 non-null  float64
 17  cons.conf.idx   41188 non-null 

### Checking null variables

In [4]:
print('Null Values: ', dataset.isnull().any())

Null Values:  age               False
job               False
marital           False
education         False
default           False
housing           False
loan              False
contact           False
month             False
day_of_week       False
duration          False
campaign          False
pdays             False
previous          False
poutcome          False
emp.var.rate      False
cons.price.idx    False
cons.conf.idx     False
euribor3m         False
nr.employed       False
y                 False
dtype: bool


### Data Preprocessing


In [7]:
# Convert Categorical values to Numerics
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
labelencoder = LabelEncoder()
categorilcals = ['job','marital','education','default','housing','loan','contact','month','day_of_week','poutcome','y']
for word in (categorilcals):
    dataset[word]      = labelencoder.fit_transform(dataset[word]) 
# Set Features and Results    
X = dataset.iloc[:, :20].values
y = dataset.iloc[:, -1].values

#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)   

# Data split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,shuffle=True)

In [None]:
#Call MLPClassifier to build Neural Network
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RepeatedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
rkf_y = RepeatedKFold(n_splits=3, n_repeats=3, random_state =True)
mlp = MLPClassifier()
parameters={
'hidden_layer_sizes': [(10,10,10), (10,10,10,10), (10,10,10,10,10), (10,10,10,10,10,10)],
'alpha': [0.00001, 0.0001, 0.001, 0.01, 0.1]
}

mdl= GridSearchCV(mlp,param_grid=parameters,scoring='roc_auc',n_jobs=-1,verbose=1,cv=rkf_y)
scores_mdl=mdl.fit(X_train, y_train )

Fitting 9 folds for each of 20 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


In [None]:
print("Best parameter = ",mdl.best_params_)
print("Best score = ",mdl.best_score_)