In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns

In [2]:
df = pd.read_csv('./logisticReg.csv')
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [3]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()

In [5]:
df['Gender'] = encoder.fit_transform(df['Gender'])

In [6]:
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,1,19,19000,0
1,15810944,1,35,20000,0
2,15668575,0,26,43000,0
3,15603246,0,27,57000,0
4,15804002,1,19,76000,0


In [8]:
df = df.rename(columns={'User ID': 'user'})
df = df.drop('user',axis=1)

In [9]:
df.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0


In [10]:
X = df.drop(['Purchased'],axis=1)
y = df['Purchased']

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [14]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import  accuracy_score, confusion_matrix, f1_score

In [22]:
dectree = DecisionTreeClassifier(criterion='log_loss',splitter='best',max_depth=4)
dectree.fit(X_train,y_train)
y_pred_dt = dectree.predict(X_test)
dt_f1 = f1_score(y_test, y_pred_dt)
print('Accuracy Score for Decision Trees: ', accuracy_score(y_test, y_pred_dt))
print('Confusion Matrix for Decision Trees: \n', confusion_matrix(y_test, y_pred_dt))
print('F1 Score for Decision Trees: ', f1_score(y_test, y_pred_dt))

Accuracy Score for Decision Trees:  0.9125
Confusion Matrix for Decision Trees: 
 [[47  5]
 [ 2 26]]
F1 Score for Decision Trees:  0.8813559322033899


In [23]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred_lr = logreg.predict(X_test)
lr_f1 = f1_score(y_test, y_pred_lr)
print('Accuracy Score for Logistic Regression: ', accuracy_score(y_test, y_pred_lr))
print('Confusion Matrix for Logistic Regression: \n', confusion_matrix(y_test, y_pred_lr))
print('F1 Score for Logistic Regression: ', f1_score(y_test, y_pred_lr))

Accuracy Score for Decision Trees:  0.65
Confusion Matrix for Decision Trees: 
 [[52  0]
 [28  0]]
F1 Score for Decision Trees:  0.0


In [24]:
nb = GaussianNB()
nb.fit(X_train,y_train)
y_pred_nb = nb.predict(X_test)
nb_f1 = f1_score(y_test, y_pred_nb)
print('Accuracy Score for Naive Bayes: ', accuracy_score(y_test, y_pred_nb))
print('Confusion Matrix for Naive Bayes: \n', confusion_matrix(y_test, y_pred_nb))
print('F1 Score for Naive Bayes: ', f1_score(y_test, y_pred_nb))

Accuracy Score for Naive Bayes:  0.9375
Confusion Matrix for Naive Bayes: 
 [[50  2]
 [ 3 25]]
F1 Score for Naive Bayes:  0.9090909090909091


In [34]:
from sklearn.ensemble import VotingClassifier
estimators = [('dt',dectree), ('lr', logreg),( 'nb', nb)]

In [35]:
voting_classifier_hard = VotingClassifier(estimators, voting='hard')
voting_classifier_soft = VotingClassifier(estimators, voting='soft')

In [36]:
voting_classifier_hard.fit(X_train, y_train)
voting_classifier_soft.fit(X_train, y_train)

In [39]:
y_pred_votinghard = voting_classifier_hard.predict(X_test)
y_pred_votingsoft = voting_classifier_soft.predict(X_test)

print('Accuracy Score for Voting Classifier (Hard Voting): ', accuracy_score(y_test, y_pred_votinghard))
print('Confusion Matrix for Voting Classifier (Hard Voting): \n', confusion_matrix(y_test, y_pred_votinghard))
print('F1 Score for Voting Classifier (Hard Voting): ', f1_score(y_test, y_pred_votinghard))
print('\n\n')
print('Accuracy Score for Voting Classifier (Soft Voting): ', accuracy_score(y_test, y_pred_votingsoft))
print('Confusion Matrix for Voting Classifier (Soft Voting): \n', confusion_matrix(y_test, y_pred_votingsoft))
print('F1 Score for Voting Classifier (Soft Voting): ', f1_score(y_test, y_pred_votingsoft))

Accuracy Score for Voting Classifier (Hard Voting):  0.925
Confusion Matrix for Voting Classifier (Hard Voting): 
 [[50  2]
 [ 4 24]]
F1 Score for Voting Classifier (Hard Voting):  0.888888888888889



Accuracy Score for Voting Classifier (Soft Voting):  0.9125
Confusion Matrix for Voting Classifier (Soft Voting): 
 [[48  4]
 [ 3 25]]
F1 Score for Voting Classifier (Soft Voting):  0.8771929824561403
