In [8]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import pickle
import pandas as pd
import numpy as np



In [9]:
# load the trained model and onehot encoded file and scaler pickle

model = load_model("model.h5")

with open("label_encoder_gender.pkl","rb") as file:
    label_encoder_gender = pickle.load(file)

with open("onehot_encoder_geo.pkl","rb") as file:
    onehot_encoder_geo = pickle.load(file)

with open("scaler.pkl","rb") as file:
    scaler = pickle.load(file)




In [30]:
## for predicting the data we need to first convert the categorical data to 
# numerical values like the geo and gender column

input_data = {
    'CreditScore':600,
    'Geography':'France',
    'Gender':'Male',
    'Age':40,
    'Tenure':3,
    'Balance':60000,
    'NumOfProducts':2,
    'HasCrCard':1,
    'IsActiveMember':1,
    'EstimatedSalary':50000
}

# for prediction also 
# we need to convert the categorical text to OHE or vectors

# dictionary values to simple mapped values 
# AS gender column only had two values

# gender_map = {"Female": 0, "Male": 1}
# input_data["Gender"] = gender_map[input_data["Gender"]]

# dictionary column geography value to one hot encoded value
input_geo_encoded = onehot_encoder_geo.transform([[input_data["Geography"]]]).toarray()
cols = onehot_encoder_geo.get_feature_names_out(["Geography"])

input_df = pd.DataFrame(input_geo_encoded,columns=cols)
input_df




Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0


In [31]:
# u will not able able to predict the values as the model was trained 
# with dataframes and u have dictionary so convert it in dataframe

# if the dic contains a list of key value pairs then staright away use df
# if dic contains single values then use it as list -- [df]

input_data_df = pd.DataFrame([input_data])
input_data_df

input_data_df = pd.concat([input_data_df.drop("Geography",axis=1),input_df],axis=1)
input_data_df

input_data_df["Gender"] = label_encoder_gender.transform(input_data_df["Gender"])
input_data_df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,600,1,40,3,60000,2,1,1,50000,1.0,0.0,0.0


In [33]:
# final tranformation -- scaling
input_scaled = scaler.transform(input_data_df)
input_scaled


array([[-0.53598516,  0.91324755,  0.10479359, -0.69539349, -0.25781119,
         0.80843615,  0.64920267,  0.97481699, -0.87683221,  1.00150113,
        -0.57946723, -0.57638802]])

scaler is a StandardScaler object that was already fitted on the training data.

.transform() applies the same scaling (mean & std learned from training) to your new input row (input_data_df).

It outputs the standardized values, where each feature is roughly on the same scale.

If your input value is equal to the mean, the scaled value = 0.

If it’s above the mean, the scaled value = positive (how many stds above).

If it’s below the mean, the scaled value = negative (how many stds below).


In [36]:
# actual prediction

prediction = model.predict(input_scaled)
print(prediction)

probab = prediction[0][0]
print(probab)

if probab>0.5:
    print("the customer is likely to churn")
else:
    print("the customer is not likely to churn")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[[0.02061399]]
0.020613993
the customer is not likely to churn
