In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve
from sklearn.metrics import precision_recall_curve, average_precision_score
import pickle

In [3]:
df = pd.read_csv("data/test.csv", sep=";")

In [4]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no


In [5]:
def label_encoding(df):
    replace_dict = {
        'job': {
            'management': 0, 'technician': 1, 'entrepreneur': 2,
            'blue-collar': 3, 'unknown': 4, 'retired': 5, 'admin.': 6,
            'services': 7, 'self-employed': 8, 'unemployed': 9,
            'housemaid': 10, 'student': 11
        },
        'marital': {'married': 0, 'single': 1, 'divorced': 2},
        'education': {'tertiary': 0, 'secondary': 1, 'unknown': 2, 'primary': 3},
        'default': {'no': 0, 'yes': 1},
        'housing': {'yes': 1, 'no': 0},
        'loan': {'no': 0, 'yes': 1},
        'poutcome': {'unknown': 0, 'failure': 1, 'other': 2, 'success': 3}
    }
    df.replace(replace_dict, inplace=True)
    return df

In [6]:
df_features = df.iloc[:,:-1]
df_target = df.iloc[:,-1]

df_features.drop(columns=['contact', 'day', 'month', 'duration','previous'], inplace=True)

df_target.replace({'yes': 1, 'no':0}, inplace=True)

df_features = label_encoding(df_features)

In [7]:
pickled_model = pickle.load(open('rf_trained_model.pkl', 'rb'))

In [9]:
y_pred = pickled_model.predict(df_features)
y_true = np.array(df_target, dtype='int64')

In [10]:
print(confusion_matrix(y_true, y_pred))
print(classification_report(y_true, y_pred))

[[2888 1112]
 [ 258  263]]
              precision    recall  f1-score   support

           0       0.92      0.72      0.81      4000
           1       0.19      0.50      0.28       521

    accuracy                           0.70      4521
   macro avg       0.55      0.61      0.54      4521
weighted avg       0.83      0.70      0.75      4521

