### Create a Single-User One-hot-encoded record to feed to the Pickled model

In [11]:
import numpy as np
import pandas as pd
import joblib

In [12]:
#Read the Data file

df = pd.read_csv('Data.csv')
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,cluster_number
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,84
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,No,No,No,One year,No,Mailed check,56.95,1889.5,No,81
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,56
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No,18
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,43


In [13]:
df.columns

Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn',
       'cluster_number'],
      dtype='object')

In [28]:
#Select the index for the singular record

s_u1 = df[df.index==13]
s_u = pd.DataFrame(s_u1, columns=df.columns)
s_u

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,cluster_number
13,0280-XJGEX,Male,0,No,No,49,Yes,Yes,Fiber optic,No,...,No,Yes,Yes,Month-to-month,Yes,Bank transfer (automatic),103.7,5036.3,Yes,1


In [29]:
#Drop the predictable variables

s_u.drop(['Churn', 'cluster_number'],axis=1,inplace=True)
s_u

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges
13,0280-XJGEX,Male,0,No,No,49,Yes,Yes,Fiber optic,No,Yes,Yes,No,Yes,Yes,Month-to-month,Yes,Bank transfer (automatic),103.7,5036.3


In [30]:
#Convert the single record to a csv file and save

s_u.to_csv('Singleuser.csv', index=False)

In [31]:
df1 = pd.read_csv('Singleuser.csv')
df1

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges
0,0280-XJGEX,Male,0,No,No,49,Yes,Yes,Fiber optic,No,Yes,Yes,No,Yes,Yes,Month-to-month,Yes,Bank transfer (automatic),103.7,5036.3


In [32]:
#Import Required Libraries

import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import joblib

def read_data(data_path):
    df1 = pd.read_csv(data_path)
    df1.replace(' ', '0', inplace=True)
    return df1

#Read the Data
df_original = read_data('Data.csv')

#Read the single user record to encode
df_sear = pd.read_csv('Singleuser.csv')

#Add dummy values to merge with main data file for automatic encoding
df_sear['Churn'] = 1
df_sear['cluster_number'] = 5

#Add a dummy Customer-id for proper concatenation
if ('customerID' in df_sear.columns):
    l = [df_sear.iloc[0:1], df_original.iloc[0:1000]]

else:
    searched = df_sear
    searched['customerID'] = 'Dummy'
    searched = searched[df_original.columns]
    l = [searched, df_original.iloc[0:1000]]


#Concatenate the single-data record with 1000 data samples for one-hot encoding
df_for_oht = pd.concat(l)

#This function will return the encoded data that we're supposed to feed into the pickled model
def convert_data(df_churn):
    df_churn = df_churn.reset_index(drop=True)
    empty_cols = ['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
                  'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
                  'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
                  'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
                  'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn']

    for i in empty_cols:
        df_churn[i] = df_churn[i].replace(" ", np.nan)

    df_churn.drop(['customerID', 'cluster_number'], axis=1, inplace=True)
    df_churn = df_churn.dropna()
    binary_cols = ['Partner', 'Dependents', 'PhoneService', 'PaperlessBilling']

    #Binary Encoding
    for i in binary_cols:
        df_churn[i] = df_churn[i].replace({"Yes": 1, "No": 0})

    # Encoding column 'gender'
    df_churn['gender'] = df_churn['gender'].replace({"Male": 1, "Female": 0})
    df_churn['Churn'] = df_churn['Churn'].replace({"Yes": 1, "No": 0})
    df_churn['PaymentMethod'] = df_churn['PaymentMethod'].replace({"Yes": 1, "No": 0})

    category_cols = ['PaymentMethod', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup',
                     'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract']

    #Categorical Encoding
    for cc in category_cols:
        dummies = pd.get_dummies(df_churn[cc], drop_first=False)
        dummies = dummies.add_prefix("{}#".format(cc))
        df_churn.drop(cc, axis=1, inplace=True)
        df_churn = df_churn.join(dummies)

    return df_churn

#Get the encoded data of 1001 samples
df4 = convert_data(df_for_oht)

l = list(df4.columns)
l = [l[-1]] + l[:-1]
df4 = df4[l]

#Separate the now encoded record, remove dummy 'Churn' value
to_pred = df4.iloc[0,:]
to_pred.drop(['Churn'], inplace=True)
to_pred

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Contract#Two year                             0.0
gender                                        1.0
SeniorCitizen                                 0.0
Partner                                       0.0
Dependents                                    0.0
tenure                                       49.0
PhoneService                                  1.0
PaperlessBilling                              1.0
MonthlyCharges                              103.7
TotalCharges                               5036.3
PaymentMethod#Bank transfer (automatic)       1.0
PaymentMethod#Credit card (automatic)         0.0
PaymentMethod#Electronic check                0.0
PaymentMethod#Mailed check                    0.0
MultipleLines#No                              0.0
MultipleLines#No phone service                0.0
MultipleLines#Yes                             1.0
InternetService#DSL                           0.0
InternetService#Fiber optic                   1.0
InternetService#No                            0.0


In [34]:
#Render the encoded record as Json file in the given format to feed as the input to the 
#KFServing Inferenceservice

import json

to_pred_json = {
    "instances": [
        list(to_pred)
    ]
}

to_pred_json = json.dumps(to_pred_json)
with open("./single_user_4.json", "w") as outfile: 
    outfile.write(to_pred_json) 
    
