# Machine Learning Process
# 1. Get Data


In [None]:
import pandas as pd
df = pd.read_csv("/content/bank-data.csv")
df

# 2. Clean Data and Data Preparation


In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.dtypes

In [None]:
df.isnull().sum()

In [None]:
df['income'] = df['income'].fillna(df['income'].mean())

In [None]:
df.isnull().sum()

In [None]:
df.dtypes

In [None]:
from sklearn.preprocessing import LabelEncoder
# creating instance of labelencoder
labelencoder = LabelEncoder()
df['sex'] = labelencoder.fit_transform(df['sex'])
df['region'] = labelencoder.fit_transform(df['region'])
df['married'] = labelencoder.fit_transform(df['married'])
df['car'] = labelencoder.fit_transform(df['car'])
df['save_act'] = labelencoder.fit_transform(df['save_act'])
df['current_act'] = labelencoder.fit_transform(df['current_act'])
df['mortgage'] = labelencoder.fit_transform(df['mortgage'])
df['credit'] = labelencoder.fit_transform(df['credit'])

In [None]:
df.head(20)

In [None]:
df.groupby('credit')['income'].mean()

In [None]:
df.dtypes

In [None]:
df = df.drop(['id'],axis=1)

In [None]:
df.columns

# 3. Select Model and Model Constraction

3.1 Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

x = df.drop(['credit'],axis=1)
y = df.credit


# Split dataset into training set and test set 
from sklearn.model_selection import train_test_split

train_data, test_data, train_y, test_y = train_test_split(x, y, test_size=0.30, shuffle=False)


# Create NB classifer object
NBclassifier = GaussianNB()

# Train NB Classifer
NBclassifier.fit(train_data,train_y)

# Test Data
y_predict = NBclassifier.predict(test_data)

In [None]:
y_predict

In [None]:
from sklearn import metrics
print("Accuracy:",metrics.accuracy_score(test_y,y_predict))

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

print(classification_report(test_y,y_predict))

In [None]:
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_confusion_matrix
cm = confusion_matrix(test_y,y_predict)
fig, ax = plot_confusion_matrix(conf_mat=cm)
plt.show()

In [None]:
x

In [None]:
y_predict_NB_unseen =  NBclassifier.predict([[41, 0, 0, 20000, 1, 2, 1, 1, 1, 0]])
print(y_predict_NB_unseen)

3.2 Decision Tree


In [None]:
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn import tree

x = df.drop(['credit'],axis=1)
y = df.credit


# Split dataset into training set and test set 
from sklearn.model_selection import train_test_split

train_data, test_data, train_y, test_y = train_test_split(x, y, test_size=0.30, shuffle=False)


# Create Decision Tree classifer object
decision_tree_classifier = DecisionTreeClassifier(criterion="entropy", max_depth=None)

# Train Decision Tree Classifer
decision_tree_classifier.fit(train_data,train_y)

y_predict = decision_tree_classifier.predict(test_data)
y_predict

In [None]:
decision_tree_classifier

In [None]:
from sklearn import metrics
print("Accuracy:",metrics.accuracy_score(test_y,y_predict))

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

print(classification_report(test_y,y_predict))

In [None]:
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_confusion_matrix
cm = confusion_matrix(test_y,y_predict)
fig, ax = plot_confusion_matrix(conf_mat=cm)
plt.show()

In [None]:
y_predict_DT_unseen =  decision_tree_classifier.predict([[41, 0, 0, 20000, 1, 2, 1, 1, 1, 0]])
print(y_predict_DT_unseen)

Decision Tree: Data Visualization Graph 

In [None]:
import pydotplus
from sklearn import tree
from IPython.display import Image

data_feature_names = x.columns
creditclass = ['no', 'yes']

dot_data = export_graphviz(decision_tree_classifier, out_file=None, feature_names=data_feature_names, class_names=creditclass, filled=True, rounded=True)
graph = pydotplus.graph_from_dot_data(dot_data)
graph.write_png('DT_tree.png')
Image(graph.create_png())