In [1]:
pip install ucimlrepo

Note: you may need to restart the kernel to use updated packages.


In [2]:
# libraries
from ucimlrepo import fetch_ucirepo
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from itertools import combinations
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy.stats import chi2_contingency, boxcox, gaussian_kde, ttest_ind
from sklearn.preprocessing import StandardScaler, MinMaxScaler,LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import pickle

In [3]:
# fetch dataset
credit_approval = fetch_ucirepo(id=27)

# data (as pandas dataframes)
X = credit_approval.data.features
y = credit_approval.data.targets

# metadata
print(credit_approval.metadata)

# variable information
print(credit_approval.variables)

{'uci_id': 27, 'name': 'Credit Approval', 'repository_url': 'https://archive.ics.uci.edu/dataset/27/credit+approval', 'data_url': 'https://archive.ics.uci.edu/static/public/27/data.csv', 'abstract': 'This data concerns credit card applications; good mix of attributes', 'area': 'Business', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 690, 'num_features': 15, 'feature_types': ['Categorical', 'Integer', 'Real'], 'demographics': [], 'target_col': ['A16'], 'index_col': None, 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 1987, 'last_updated': 'Wed Aug 23 2023', 'dataset_doi': '10.24432/C5FS30', 'creators': ['J. R. Quinlan'], 'intro_paper': None, 'additional_info': {'summary': 'This file concerns credit card applications.  All attribute names and values have been changed to meaningless symbols to protect confidentiality of the data.\r\n  \r\nThis dataset is interesting because there is a good mix of attributes --

In [4]:
df = credit_approval.data.original
df

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16
0,b,30.83,0.000,u,g,w,v,1.25,t,t,1,f,g,202.0,0,+
1,a,58.67,4.460,u,g,q,h,3.04,t,t,6,f,g,43.0,560,+
2,a,24.50,0.500,u,g,q,h,1.50,t,f,0,f,g,280.0,824,+
3,b,27.83,1.540,u,g,w,v,3.75,t,t,5,t,g,100.0,3,+
4,b,20.17,5.625,u,g,w,v,1.71,t,f,0,f,s,120.0,0,+
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
685,b,21.08,10.085,y,p,e,h,1.25,f,f,0,f,g,260.0,0,-
686,a,22.67,0.750,u,g,c,v,2.00,f,t,2,t,g,200.0,394,-
687,a,25.25,13.500,y,p,ff,ff,2.00,f,t,1,t,g,200.0,1,-
688,b,17.92,0.205,u,g,aa,v,0.04,f,f,0,f,g,280.0,750,-


In [5]:
# Columns with alphabetic labels
missing_columns = ['A1', 'A2', 'A4', 'A5', 'A6', 'A7', 'A14']

df = df.dropna(subset=missing_columns)

In [6]:
# Box Cox (remove/ minimize skew)
cont_cols = ['A2', 'A3', 'A8', 'A11', 'A14','A15']

# Array to store lambda, mean, and variance
cont_cols_stats = []

for col in cont_cols:
  new_col, given_lambda = boxcox(df[col] + 0.1)
  df[col] = new_col
  # Calculate mean and standard deviation (square root of variance)
  mean_val = df[col].mean()
  std_dev = np.sqrt(df[col].var())
    
  # Append the lambda, mean, and variance to stats_array
  cont_cols_stats.append((col, given_lambda, mean_val, std_dev))

# Standardization (mean = 0, variance = 1)
scaler = StandardScaler()
df[['A2', 'A3', 'A8', 'A11', 'A14','A15']] = scaler.fit_transform(df[['A2', 'A3', 'A8', 'A11', 'A14','A15']])

for stat in cont_cols_stats:
    print(f"{stat[0]} -> lambda: {stat[1]}, mean: {stat[2]}, std dev: {stat[3]}")

A2 -> lambda: -0.5021287318128576, mean: 1.6230054024379794, std dev: 0.0627423970211426
A3 -> lambda: 0.16780575394858463, mean: 1.2154677219893335, std dev: 1.4344477441439734
A8 -> lambda: -0.0001447974189266249, mean: -0.029250955634897657, std dev: 1.412976159512927
A11 -> lambda: -0.2945874276798615, mean: -1.3982438609100556, std dev: 2.1845410808406283
A14 -> lambda: 0.3750478969981528, mean: 13.117686422524073, std dev: 8.398731268007579
A15 -> lambda: -0.05243728359787421, mean: 1.4816559045539877, std dev: 3.7042728321208704


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = new_col
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['A2', 'A3', 'A8', 'A11', 'A14','A15']] = scaler.fit_transform(df[['A2', 'A3', 'A8', 'A11', 'A14','A15']])


In [7]:
pickle_out = open("classifier.pkl","wb")
pickle.dump(cont_cols_stats, pickle_out)
pickle.dump(df, pickle_out)
pickle_out.close()

In [8]:
categorical_col = ["A1","A4","A5","A6","A7", "A9","A10","A12","A13","A16"]

#Imputing

def manhattan(p1,p2):
    p1 = list(p1)
    p2 = list(p2)
    return sum([np.abs(p1[i] - p2[i]) for i in range(len(p1))])

def euclidian(p1,p2):
    p1 = list(p1)
    p2 = list(p2)
    return sum([(p1[i] - p2[i])**2 for i in range(len(p1))])**0.5

for col in categorical_col:
  print(col)
  colcount = df[col].value_counts()
  total_items = sum(colcount)
  colcount = colcount/total_items

  imputing_labels = list(colcount[colcount < 0.05].index) #minority is arbitrarily set at 5%
  for minority in imputing_labels:
    affected_rows = df.loc[df[col] == minority].index

    k = 3

    display(df.loc[affected_rows])

    for nin,target in df.loc[affected_rows].iterrows():
      all_distances = {}
      for rownum,comparison in df.iterrows():
        if rownum in affected_rows:
          continue
        all_distances[rownum] = euclidian(target[cont_cols], comparison[cont_cols])
      topk = sorted(list(all_distances.keys()),key = lambda x: all_distances[x])[:k]
      topk_label = [(df.loc[top])[col] for top in topk]
      final_label = sorted(set(topk_label),key=topk_label.count,reverse=True)[0]
      df.loc[nin, col] = final_label

    df.loc[affected_rows]


A1
A4


Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16
317,b,-1.64776,1.983792,l,gg,ff,o,-1.610403,f,f,-0.868749,t,p,1.260822,1.934765,+
321,a,-1.526228,-1.336232,l,gg,cc,ff,1.658339,f,f,-0.868749,t,s,0.817747,-1.061485,+


A5


Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16
317,b,-1.64776,1.983792,y,gg,ff,o,-1.610403,f,f,-0.868749,t,p,1.260822,1.934765,+
321,a,-1.526228,-1.336232,y,gg,cc,ff,1.658339,f,f,-0.868749,t,s,0.817747,-1.061485,+


A6


Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16
18,b,-0.860825,-1.51953,u,g,d,h,-0.169017,t,f,-0.868749,t,g,-1.746811,-1.061485,+
27,b,1.694771,1.78447,u,g,d,bb,1.943073,t,t,1.52185,t,g,-1.746811,-1.061485,+
61,b,0.283613,1.63333,u,g,d,v,0.821988,t,t,1.38425,f,g,0.639455,1.105662,+
169,b,0.737426,-0.703968,y,p,d,v,0.353595,f,f,-0.868749,t,g,1.210414,-1.061485,+
211,b,-0.505253,0.718859,y,p,d,v,1.240747,t,f,-0.868749,t,s,-0.093089,-1.061485,+
257,b,-1.162215,-2.180467,u,g,d,v,-0.341098,f,f,-0.868749,f,g,0.168664,-1.061485,-
260,b,0.376735,-0.178348,u,g,d,h,1.271673,f,f,-0.868749,t,g,0.79054,-1.061485,-
267,a,0.312566,0.625923,u,g,d,v,0.233268,f,f,-0.868749,f,g,0.720417,-1.061485,-
275,b,-1.426041,0.580081,u,g,d,v,-0.295384,f,f,-0.868749,f,g,0.033309,-1.061485,-
301,b,-1.491705,-1.678533,u,g,d,v,-0.722895,f,f,-0.868749,t,s,0.74884,-1.061485,-


Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16
27,b,1.694771,1.78447,u,g,e,bb,1.943073,t,t,1.52185,t,g,-1.746811,-1.061485,+
28,b,1.725558,0.960041,u,g,e,h,1.408794,t,t,1.081335,f,g,-1.746811,-1.061485,+
33,a,0.685154,0.481489,u,g,e,v,1.174521,t,f,-0.868749,t,g,-1.746811,1.416787,+
70,b,0.341075,0.83757,u,g,e,bb,0.390254,t,f,-0.868749,t,s,1.180593,-1.061485,-
105,b,1.628374,1.586987,u,g,e,z,-1.610403,t,t,1.553183,f,g,0.210618,0.760495,-
130,b,2.056045,0.545682,u,g,e,z,1.842479,t,t,0.683609,t,g,-1.746811,-1.061485,+
132,a,1.307633,0.900379,u,g,e,bb,1.357087,t,t,1.282781,f,g,1.053231,1.833821,+
157,a,2.081804,1.551048,u,g,e,z,-1.610403,t,t,1.482451,f,g,-1.746811,1.386995,+
163,b,0.312566,-0.395861,y,p,e,h,-1.372017,t,f,-0.868749,t,g,1.105264,-1.061485,+
218,b,1.592577,1.084387,u,g,e,v,1.557962,t,t,1.233339,f,g,-1.746811,-1.061485,+


Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16
89,a,1.382082,-0.506808,u,g,j,j,-1.610403,t,f,-0.868749,t,g,-0.093089,0.418273,-
141,a,1.691803,1.621981,u,g,j,ff,-1.610403,t,t,1.496703,f,g,-1.746811,0.892524,+
155,a,-0.885649,1.267251,y,p,j,j,-1.610403,t,t,1.430376,t,g,-1.746811,-1.061485,+
157,a,2.081804,1.551048,u,g,j,z,-1.610403,t,t,1.482451,f,g,-1.746811,1.386995,+
266,b,-1.475624,-0.655269,y,p,j,dd,-1.610403,f,f,-0.868749,f,g,-0.093089,-1.061485,-
289,a,-0.873203,-0.395861,y,p,j,j,-1.610403,f,f,-0.868749,f,g,0.251214,-1.061485,-
309,a,0.290673,0.021257,y,p,j,j,-1.610403,f,f,-0.868749,f,g,0.251214,0.349835,-
350,a,-0.276697,-0.296764,u,g,j,j,-1.610403,f,f,-0.868749,t,g,0.734693,-0.374607,-
400,b,-1.033895,0.474418,y,p,j,v,-0.64624,f,f,-0.868749,f,g,0.14714,0.8325,-
427,b,0.737426,-0.894617,u,g,j,v,-1.372017,f,f,-0.868749,f,g,0.033309,0.021594,-


Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16
6,b,0.411717,-0.755565,u,g,r,h,1.357087,t,f,-0.868749,t,g,0.271038,1.757771,+
369,b,-0.924989,-0.959843,y,p,r,n,-0.094391,f,f,-0.868749,t,g,0.10291,-0.203696,-
552,b,0.537594,1.551048,u,g,r,n,1.224762,t,t,1.38425,t,g,-1.746811,0.766828,+


A7


Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16
89,a,1.382082,-0.506808,u,g,ff,j,-1.610403,t,f,-0.868749,t,g,-0.093089,0.418273,-
155,a,-0.885649,1.267251,y,p,q,j,-1.610403,t,t,1.430376,t,g,-1.746811,-1.061485,+
289,a,-0.873203,-0.395861,y,p,c,j,-1.610403,f,f,-0.868749,f,g,0.251214,-1.061485,-
309,a,0.290673,0.021257,y,p,w,j,-1.610403,f,f,-0.868749,f,g,0.251214,0.349835,-
350,a,-0.276697,-0.296764,u,g,i,j,-1.610403,f,f,-0.868749,t,g,0.734693,-0.374607,-
464,a,-0.687223,-0.360981,u,g,k,j,-1.610403,f,t,0.683609,f,g,0.437209,0.568462,-
511,a,1.237477,0.262715,u,g,w,j,-1.610403,t,f,-0.868749,f,g,-0.093089,1.157648,+
621,b,-0.734828,-1.678533,u,g,c,j,0.625836,f,f,-0.868749,t,s,-1.746811,-1.061485,+


Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16
105,b,1.628374,1.586987,u,g,q,z,-1.610403,t,t,1.553183,f,g,0.210618,0.760495,-
130,b,2.056045,0.545682,u,g,q,z,1.842479,t,t,0.683609,t,g,-1.746811,-1.061485,+
157,a,2.081804,1.551048,u,g,ff,z,-1.610403,t,t,1.482451,f,g,-1.746811,1.386995,+
221,b,1.988392,1.221044,u,g,w,z,2.145571,t,t,1.322706,t,g,-0.866293,-1.061485,+
250,b,0.916436,1.957003,u,g,q,z,2.145571,t,t,1.430376,f,g,-1.746811,1.199455,+
345,b,1.906281,0.771222,u,g,ff,z,-1.610403,f,f,-0.868749,f,g,-1.746811,0.231113,-
550,b,2.287759,1.999099,u,g,c,z,1.828836,t,t,0.683609,t,g,-1.746811,0.72348,+
584,a,-0.063572,1.551048,y,p,c,z,-1.610403,t,f,-0.868749,f,g,-1.746811,1.619328,+


Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16
266,b,-1.475624,-0.655269,y,p,m,dd,-1.610403,f,f,-0.868749,f,g,-0.093089,-1.061485,-
364,b,-0.493477,-0.296764,u,g,k,dd,-0.919972,f,t,0.945865,f,g,0.88384,1.214333,-
371,b,-0.25796,1.396572,u,g,i,dd,-1.610403,f,f,-0.868749,t,g,0.14714,1.184904,-
375,a,-1.020616,-1.18953,y,p,c,dd,0.088222,f,f,-0.868749,f,g,0.676786,-1.061485,-
513,b,-1.118653,1.119096,u,g,aa,dd,-1.610403,t,f,-0.868749,f,g,-1.746811,-1.061485,+
521,a,0.129854,0.510191,u,g,c,dd,0.625836,t,t,1.233339,t,g,-0.099808,1.032595,+


Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16
269,b,-1.542642,-1.678533,u,g,q,n,-0.808865,f,f,-0.868749,f,g,0.437209,0.506408,+
369,b,-0.924989,-0.959843,y,p,q,n,-0.094391,f,f,-0.868749,t,g,0.10291,-0.203696,-
552,b,0.537594,1.551048,u,g,c,n,1.224762,t,t,1.38425,t,g,-1.746811,0.766828,+
635,b,-1.507891,-0.137626,u,g,c,n,0.061987,f,t,0.945865,t,g,0.251214,1.063744,-


Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16
262,a,1.343429,-0.588227,u,g,i,o,-0.568887,f,f,-0.868749,f,g,-1.746811,0.74372,-
317,b,-1.64776,1.983792,y,p,ff,o,-1.610403,f,f,-0.868749,t,p,1.260822,1.934765,+


A9
A10
A12
A13


Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16
317,b,-1.64776,1.983792,y,p,ff,v,-1.610403,f,f,-0.868749,t,p,1.260822,1.934765,+
424,b,0.452507,-0.236617,u,g,c,v,0.353595,f,f,-0.868749,f,p,0.033309,-1.061485,-


A16


In [9]:
# List of columns to apply Label Encoding
label_columns = ['A1', 'A4', 'A5', 'A6', 'A7', 'A9', 'A10', 'A12', 'A13', 'A16']


# Initialize Label Encoder
label_encoder = LabelEncoder()


# Apply Label Encoding to each column
for col in label_columns:
    df[col] = label_encoder.fit_transform(df[col])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = label_encoder.fit_transform(df[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = label_encoder.fit_transform(df[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = label_encoder.fit_transform(df[col])
A value is trying to be set on a copy of a slice from a DataFram

In [10]:
# List of columns to apply One-Hot Encoding, A6 A7 because they have more than 2 classes
one_hot_columns = ['A6', 'A7']

# Apply One-Hot Encoding
df_one_hot = pd.get_dummies(df, columns = one_hot_columns)

df = df_one_hot

In [11]:
seed = 8888

independent = ['A1','A2','A3','A4','A5','A6','A7','A8','A9','A10','A11','A12','A13','A14','A15']
dependent = "A16"

# KNN
x_data = df.loc[:, df.columns != dependent]
y_data = df[dependent]

x_train , x_test, y_train, y_test = train_test_split(x_data,y_data,test_size = 0.2, random_state=seed)

best_model = None
best_prediction = None
best_acc = 0

for k in range(2,21):
  knn = KNeighborsClassifier(n_neighbors=k)
  knn.fit(x_train,y_train)

  y_pred = knn.predict(x_test)

  acc = accuracy_score(y_test, y_pred)
  if acc > best_acc:
    best_model = knn
    best_prediction = y_pred
    best_acc = acc

print(best_model.get_params())
print(confusion_matrix(y_test, best_prediction))
print(accuracy_score(y_test, best_prediction) * 100, "%")
print(classification_report(y_test, best_prediction))

knn_acc = best_acc

{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 8, 'p': 2, 'weights': 'uniform'}
[[54 10]
 [ 3 64]]
90.07633587786259 %
              precision    recall  f1-score   support

           0       0.95      0.84      0.89        64
           1       0.86      0.96      0.91        67

    accuracy                           0.90       131
   macro avg       0.91      0.90      0.90       131
weighted avg       0.91      0.90      0.90       131



In [12]:
# ANN
x_data = df.loc[:, df.columns != dependent]
y_data = df[dependent]

x_train , x_test, y_train, y_test = train_test_split(x_data,y_data,test_size = 0.2,random_state = seed)

ann = MLPClassifier(hidden_layer_sizes=(30,20,12,7), solver="lbfgs",max_iter=1000,random_state=40)

ann.fit(x_train,y_train)

y_pred = ann.predict(x_test)

print(confusion_matrix(y_test, y_pred))
print(accuracy_score(y_test, y_pred) * 100, "%")
print(classification_report(y_test, y_pred))

ann_acc = accuracy_score(y_test, y_pred)

[[54 10]
 [ 6 61]]
87.78625954198473 %
              precision    recall  f1-score   support

           0       0.90      0.84      0.87        64
           1       0.86      0.91      0.88        67

    accuracy                           0.88       131
   macro avg       0.88      0.88      0.88       131
weighted avg       0.88      0.88      0.88       131



In [13]:
# Logistic Regression
x_data = df.loc[:, df.columns != dependent]
y_data = df[dependent]

x_train , x_test, y_train, y_test = train_test_split(x_data,y_data,test_size = 0.2, random_state = seed)

logreg = LogisticRegression(solver="liblinear", max_iter=1000,random_state=40)
logreg.fit(x_train,y_train)

y_pred = logreg.predict(x_test)

print(confusion_matrix(y_test, y_pred))
print(accuracy_score(y_test, y_pred) * 100, "%")
print(classification_report(y_test, y_pred))

logreg_acc = accuracy_score(y_test, y_pred)

[[58  6]
 [ 5 62]]
91.6030534351145 %
              precision    recall  f1-score   support

           0       0.92      0.91      0.91        64
           1       0.91      0.93      0.92        67

    accuracy                           0.92       131
   macro avg       0.92      0.92      0.92       131
weighted avg       0.92      0.92      0.92       131



In [14]:
# SVM
x_data = df.loc[:, df.columns != dependent]
y_data = df[dependent]

x_train , x_test, y_train, y_test = train_test_split(x_data,y_data,test_size = 0.2, random_state = seed)

svm = SVC(random_state=40)
svm.fit(x_train,y_train)
y_pred = svm.predict(x_test)

print(confusion_matrix(y_test, y_pred))
print(accuracy_score(y_test, y_pred) * 100, "%")
print(classification_report(y_test, y_pred))

svm_acc = accuracy_score(y_test, y_pred)

[[57  7]
 [ 3 64]]
92.36641221374046 %
              precision    recall  f1-score   support

           0       0.95      0.89      0.92        64
           1       0.90      0.96      0.93        67

    accuracy                           0.92       131
   macro avg       0.93      0.92      0.92       131
weighted avg       0.93      0.92      0.92       131



In [15]:
# Ensemble
x_data = df.loc[:, df.columns != dependent]
y_data = df[dependent]

x_train , x_test, y_train, y_test = train_test_split(x_data,y_data,test_size = 0.2, random_state = seed)

models = [ann,knn,svm]
model_names = ["ANN","KNN","SVM"]
model_prediction = pd.DataFrame()
for m, mn in zip(models, model_names):
  model_prediction.insert(0,mn,m.predict(x_test))

y_pred = model_prediction.mode(axis="columns")

print(confusion_matrix(y_test, y_pred))
print(accuracy_score(y_test, y_pred) * 100, "%")
print(classification_report(y_test, y_pred))

ensem_acc = accuracy_score(y_test, y_pred)

[[57  7]
 [ 3 64]]
92.36641221374046 %
              precision    recall  f1-score   support

           0       0.95      0.89      0.92        64
           1       0.90      0.96      0.93        67

    accuracy                           0.92       131
   macro avg       0.93      0.92      0.92       131
weighted avg       0.93      0.92      0.92       131



In [16]:
print("KNN Accuracy : {:.2f}%".format(knn_acc * 100))
print("ANN Accuracy : {:.2f}%".format(ann_acc * 100))
print("Logistic Reg Accuracy : {:.2f}%".format(logreg_acc * 100))
print("Ensemble Accuracy : {:.2f}%".format(ensem_acc * 100))
print("SVM Accuracy : {:.2f}%".format(svm_acc * 100))

KNN Accuracy : 90.08%
ANN Accuracy : 87.79%
Logistic Reg Accuracy : 91.60%
Ensemble Accuracy : 92.37%
SVM Accuracy : 92.37%


In [17]:
pickle_out = open("classifier.pkl","ab")

pickle.dump(models, pickle_out)
pickle_out.close()