In [1]:
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from imblearn.over_sampling import SMOTE
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd
import numpy as np
import sys
import traceback
import os
import pickle

Using TensorFlow backend.


In [2]:
model_file_path = '../trained-models/Logistic_solver_lbfgs_penalty_l2_C_70548'
with open(model_file_path, 'rb') as f:
    model = pickle.load(f)

In [3]:
model

LogisticRegression(C=70548.02311, class_weight=None, dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=1000, multi_class='auto', n_jobs=-1, penalty='l2',
                   random_state=42, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [4]:
df = pd.read_csv('../datasets-provided/test.csv')
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,target
0,41,management,married,tertiary,no,5883,no,no,cellular,20,nov,182,2,-1,0,unknown,0
1,47,technician,married,secondary,no,1233,yes,no,unknown,11,sep,91,1,100,2,other,1
2,51,blue-collar,married,primary,no,5050,no,yes,unknown,16,jun,75,7,-1,0,unknown,0
3,54,retired,married,secondary,no,3671,no,no,unknown,11,jun,67,3,-1,0,unknown,0
4,31,blue-collar,married,secondary,no,1243,yes,no,unknown,21,may,32,1,-1,0,unknown,0


In [5]:
class ErrorHandler(object):
    def handleErr(self, err):
        tb = sys.exc_info()[-1]
        stk = traceback.extract_tb(tb, 1)
        functionName = stk[0][2]
        return functionName + ":" + err


class DataFrameHandler():
    def __init__(self, df, parent=None):
        try:
            self.errObj = ErrorHandler()
            self.data_frame_original = df
            self.data_cols = self.data_frame_original.columns
            self.data_shape = self.data_frame_original.shape
            self.categorical_cols = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month',
                                     'poutcome']
            self.numerical_cols = ['age', 'balance', 'day', 'duration', 'campaign', 'pdays', 'previous']
            self.target_col = 'target'
            self.scaler = MinMaxScaler()
            self.labelEncoder = LabelEncoder()
        except Exception as exp:
            err = self.errObj.handleErr(str(exp))
            print(str(err))

    def get_dummies_data(self, df=None):
        if df is None:
            df = self.data_frame_original
        try:
            dummies_dataframe = df.copy()
            for col in self.categorical_cols:
                cat_list = pd.get_dummies(dummies_dataframe[col], prefix=col)
                dummies_dataframe = dummies_dataframe.join(cat_list)
            all_dummies_cols = dummies_dataframe.columns.values.tolist()
            cols_to_keep = [col for col in all_dummies_cols if col not in self.categorical_cols]
            dummies_dataframe = dummies_dataframe[cols_to_keep]
        except Exception as exp:
            err = self.errObj.handleErr(str(exp))
            print(str(err))
        return dummies_dataframe

    def get_label_encoded_data(self, df=None):
        if df is None:
            df = self.data_frame_original
        try:
            label_encoded_dataframe = df.copy()
            for col in self.categorical_cols:
                label_encoded_dataframe[col] = self.labelEncoder.fit_transform(label_encoded_dataframe[col])
        except Exception as exp:
            err = self.errObj.handleErr(str(exp))
            print(str(err))
        return label_encoded_dataframe

    def get_scaled_data(self, df=None):
        if df is None:
            df = self.data_frame_original
        try:
            scaled_dataframe = df.copy()
            scaled_dataframe[self.numerical_cols] = self.scaler.fit_transform(
                scaled_dataframe[self.numerical_cols])
        except Exception as exp:
            err = self.errObj.handleErr(str(exp))
            print(str(err))
        return scaled_dataframe

    def split_attribute_and_target(self, df=None):
        if df is None:
            df = self.data_frame_original
        try:
            target = df[self.target_col]
            attribute_set = df.drop(self.target_col, axis=1)
        except Exception as exp:
            err = self.errObj.handleErr(str(exp))
            print(str(err))
        return {'attributes': attribute_set, 'target': target}

    def get_binned_data(self, df=None, bins_per_col=4):
        if df is None:
            df = self.data_frame_original
        try:
            binned_dataframe = df.copy()
            for col in self.numerical_cols:
                bins = np.linspace(binned_dataframe[col].min(), binned_dataframe[col].max(), bins_per_col + 1)
                binned_dataframe[col] = pd.cut(binned_dataframe[col], bins, precision=1, include_lowest=True,
                                               right=True)
                cat_list = pd.get_dummies(binned_dataframe[col], prefix=col)
                binned_dataframe = binned_dataframe.join(cat_list)
                binned_dataframe = binned_dataframe.drop(col, axis=1)
        except Exception as exp:
            err = self.errObj.handleErr(str(exp))
            print(str(err))
        return binned_dataframe


In [6]:
df_handler = DataFrameHandler(df)
label_df = df_handler.get_label_encoded_data()
scaled_df = df_handler.get_scaled_data(df=label_df)
attribute_target_split_result = df_handler.split_attribute_and_target(df=scaled_df)
X = attribute_target_split_result['attributes']
y = attribute_target_split_result['target']

In [7]:
X

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome
0,0.323529,4,1,2,0,0.097922,0,0,0,0.633333,9,0.058609,0.032258,0.00000,0.000000,3
1,0.411765,9,1,1,0,0.033691,1,0,2,0.333333,11,0.028477,0.000000,0.14963,0.166667,1
2,0.470588,1,1,0,0,0.086416,0,1,2,0.500000,6,0.023179,0.193548,0.00000,0.000000,3
3,0.514706,5,1,1,0,0.067367,0,0,2,0.333333,6,0.020530,0.064516,0.00000,0.000000,3
4,0.176471,1,1,1,0,0.033829,1,0,2,0.666667,8,0.008940,0.000000,0.00000,0.000000,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
900,0.176471,10,1,2,0,0.024822,0,0,0,0.666667,9,0.051656,0.032258,0.00000,0.000000,3
901,0.220588,4,1,2,0,0.026287,0,0,0,0.133333,1,0.053311,0.000000,0.00000,0.000000,3
902,0.176471,9,2,2,0,0.022516,0,0,0,0.666667,1,0.029139,0.032258,0.00000,0.000000,3
903,0.308824,4,2,2,0,0.016659,0,0,0,0.833333,1,0.017550,0.193548,0.00000,0.000000,3


In [8]:
y

0      0
1      1
2      0
3      0
4      0
      ..
900    0
901    0
902    0
903    0
904    0
Name: target, Length: 905, dtype: int64

In [9]:
y_pred = model.predict(X)

In [10]:
y_pred

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,

In [11]:
print(confusion_matrix(y, y_pred))

[[699 102]
 [ 34  70]]


In [12]:
print(classification_report(y, y_pred))

              precision    recall  f1-score   support

           0       0.95      0.87      0.91       801
           1       0.41      0.67      0.51       104

    accuracy                           0.85       905
   macro avg       0.68      0.77      0.71       905
weighted avg       0.89      0.85      0.86       905

