## Let's predict loyal customer on new data

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression

In [2]:
dataset=pd.read_excel("data/new_Data.xlsx")

In [3]:
dataset.head()

Unnamed: 0,ID,DemAffl,DemAge,DemClusterGroup,DemGender,DemReg,DemTVReg,LoyalClass,LoyalSpend,LoyalTime
0,140,10.0,76.0,C,U,Midlands,Wales & West,Gold,16000.0,4.0
1,620,4.0,49.0,D,U,Midlands,Wales & West,Gold,6000.0,5.0
2,868,5.0,70.0,D,F,Midlands,Wales & West,Silver,0.02,8.0
3,1120,10.0,65.0,F,M,Midlands,Midlands,Tin,0.01,7.0
4,2313,11.0,68.0,A,F,Midlands,Midlands,Tin,0.01,8.0


Data preparation

In [4]:
dataset.isna().sum()

ID                   0
DemAffl             48
DemAge              67
DemClusterGroup     28
DemGender          114
DemReg              18
DemTVReg            18
LoyalClass           0
LoyalSpend           0
LoyalTime           15
dtype: int64

In [5]:
dataset['DemAffl']=dataset['DemAffl'].fillna(dataset['DemAffl'].mode()[0])
dataset['DemAge']=dataset['DemAge'].fillna(dataset['DemAge'].mode()[0])
dataset['DemClusterGroup']=dataset['DemClusterGroup'].fillna(dataset['DemClusterGroup'].mode()[0])
dataset['DemGender']=dataset['DemGender'].fillna(dataset['DemGender'].mode()[0])
dataset['DemReg']=dataset['DemReg'].fillna(dataset['DemReg'].mode()[0])
dataset['DemTVReg']=dataset['DemTVReg'].fillna(dataset['DemTVReg'].mode()[0])
dataset['LoyalTime']=dataset['LoyalTime'].fillna(dataset['LoyalTime'].mean())

In [6]:
dataset.isna().sum()

ID                 0
DemAffl            0
DemAge             0
DemClusterGroup    0
DemGender          0
DemReg             0
DemTVReg           0
LoyalClass         0
LoyalSpend         0
LoyalTime          0
dtype: int64

In [7]:
orgdata = dataset.copy()

Coverting category to numeric

In [8]:
# converting to mumeric
from sklearn.preprocessing import LabelEncoder
number = LabelEncoder()

dataset['DemClusterGroup'] = number.fit_transform(dataset['DemClusterGroup'].astype('str'))
integer_mapping = {l: i for i, l in enumerate(number.classes_)}
print(integer_mapping)

dataset['DemGender'] = number.fit_transform(dataset['DemGender'].astype('str'))
integer_mapping = {l: i for i, l in enumerate(number.classes_)}
print(integer_mapping)

dataset['DemReg'] = number.fit_transform(dataset['DemReg'].astype('str'))
integer_mapping = {l: i for i, l in enumerate(number.classes_)}
print(integer_mapping)

dataset['DemTVReg'] = number.fit_transform(dataset['DemTVReg'].astype('str'))
integer_mapping = {l: i for i, l in enumerate(number.classes_)}
print(integer_mapping)

dataset['LoyalClass'] = number.fit_transform(dataset['LoyalClass'].astype('str'))
integer_mapping = {l: i for i, l in enumerate(number.classes_)}
print(integer_mapping)

{'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'U': 6}
{'F': 0, 'M': 1, 'U': 2}
{'Midlands': 0, 'North': 1, 'Scottish': 2, 'South East': 3, 'South West': 4}
{'Border': 0, 'C Scotland': 1, 'East': 2, 'London': 3, 'Midlands': 4, 'N East': 5, 'N Scot': 6, 'N West': 7, 'S & S East': 8, 'S West': 9, 'Ulster': 10, 'Wales & West': 11, 'Yorkshire': 12}
{'Gold': 0, 'Platinum': 1, 'Silver': 2, 'Tin': 3}


In [9]:
dataset.head()

Unnamed: 0,ID,DemAffl,DemAge,DemClusterGroup,DemGender,DemReg,DemTVReg,LoyalClass,LoyalSpend,LoyalTime
0,140,10.0,76.0,2,2,0,11,0,16000.0,4.0
1,620,4.0,49.0,3,2,0,11,0,6000.0,5.0
2,868,5.0,70.0,3,0,0,11,2,0.02,8.0
3,1120,10.0,65.0,5,1,0,4,3,0.01,7.0
4,2313,11.0,68.0,0,0,0,4,3,0.01,8.0


Predictions

In [10]:
X_fresh = dataset.iloc[:, 1:10].values

In [11]:
import joblib
classifier = joblib.load('model/Classify_LoyalCustomers')

In [12]:
y_pred = classifier.predict(X_fresh)
print(y_pred)

[0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0
 1 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0
 1 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 1 0 0 0 0 0 1 0 0 0 0 0 

In [13]:
predictions = classifier.predict_proba(X_fresh)
predictions

array([[0.97530119, 0.02469881],
       [0.96877803, 0.03122197],
       [0.94549187, 0.05450813],
       ...,
       [0.96800247, 0.03199753],
       [0.7708916 , 0.2291084 ],
       [0.49037998, 0.50962002]])

In [14]:
df_prediction_prob = pd.DataFrame(predictions, columns = ['prob_0', 'prob_1'])

In [15]:
result = pd.concat([orgdata,df_prediction_prob],axis=1)
result.to_excel("result/Pred_New_Data.xlsx")
result.head()

Unnamed: 0,ID,DemAffl,DemAge,DemClusterGroup,DemGender,DemReg,DemTVReg,LoyalClass,LoyalSpend,LoyalTime,prob_0,prob_1
0,140,10.0,76.0,C,U,Midlands,Wales & West,Gold,16000.0,4.0,0.975301,0.024699
1,620,4.0,49.0,D,U,Midlands,Wales & West,Gold,6000.0,5.0,0.968778,0.031222
2,868,5.0,70.0,D,F,Midlands,Wales & West,Silver,0.02,8.0,0.945492,0.054508
3,1120,10.0,65.0,F,M,Midlands,Midlands,Tin,0.01,7.0,0.893317,0.106683
4,2313,11.0,68.0,A,F,Midlands,Midlands,Tin,0.01,8.0,0.802096,0.197904
