In [26]:
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
import pandas as pd
from tensorflow import keras
import matplotlib.pyplot as plt

In [27]:
df = pd.read_csv('./Datasets/insurance_data.csv')
df.head()

Unnamed: 0,age,affordibility,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


In [28]:
X = df[['age', 'affordibility']]
y = df['bought_insurance']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)
print(len(X_test))

6


In [29]:
X_train_scaled = X_train.copy()
X_train_scaled['age'] = X_train_scaled['age']/100

X_test_scaled = X_test.copy()
X_test_scaled['age'] = X_test_scaled['age']/100

In [30]:
X_train_scaled

Unnamed: 0,age,affordibility
8,0.62,1
24,0.5,1
19,0.18,1
0,0.22,1
27,0.46,1
4,0.46,1
11,0.28,1
9,0.61,1
25,0.54,1
13,0.29,0


In [None]:
model = keras.Sequential([
    keras.layers.Dense(1, input_shape=(2,), activation='sigmoid', kernel_initializer='ones', bias_initializer='zeros')
])
# the weights are initialized = 1 & the bias is inititalized = 0

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.fit(X_train_scaled, y_train, epochs=2000)

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000
Epoch 31/2000
Epoch 32/2000
Epoch 33/2000
Epoch 34/2000
Epoch 35/2000
Epoch 36/2000
Epoch 37/2000
Epoch 38/2000
Epoch 39/2000
Epoch 40/2000
Epoch 41/2000
Epoch 42/2000
Epoch 43/2000
Epoch 44/2000
Epoch 45/2000
Epoch 46/2000
Epoch 47/2000
Epoch 48/2000
Epoch 49/2000
Epoch 50/2000
Epoch 51/2000
Epoch 52/2000
Epoch 53/2000
Epoch 54/2000
Epoch 55/2000
Epoch 56/2000
Epoch 57/2000
Epoch 58/2000
Epoch 59/2000
Epoch 60/2000
Epoch 61/2000
Epoch 62/2000
Epoch 63/2000
Epoch 64/2000
Epoch 65/2000
Epoch 66/2000
Epoch 67/2000
Epoch 68/2000
Epoch 69/2000
Epoch 70/2000
Epoch 71/2000
Epoch 72/2000
E

In [None]:
model.evaluate(X_test_scaled, y_test)

In [31]:
X_test_scaled

Unnamed: 0,age,affordibility
21,0.26,0
15,0.55,1
17,0.58,1
1,0.25,0
7,0.6,0
23,0.45,1


In [None]:
model.predict(X_test_scaled)

In [None]:
coef, intercept = model.get_weights()
coef, intercept

In [None]:
def sigmoid(x):
    import math
    return 1 / (1 + math.exp(-x))

sigmoid(10)

In [None]:
def prediction_function(age, affordibility):
    weighted_sum = coef[0]*age + coef[1]*affordibility + intercept
    predicted_op = sigmoid(weighted_sum)
    return predicted_op

In [None]:
prediction_function(0.26, 0)

Now we start implementing gradient descent in plain python. Again the goal is to come up with same w1, w2 and bias that keras model calculated. We want to show how keras/tensorflow would have computed these values internally using gradient descent

In [32]:
def log_loss(y_true, y_predicted):
    epsilon = 1e-15
    y_predicted_new = [max(i,epsilon) for i in y_predicted]
    y_predicted_new = [min(i,1-epsilon) for i in y_predicted_new]
    y_predicted_new = np.array(y_predicted_new)
    return -np.mean(y_true*np.log(y_predicted_new)+(1-y_true)*np.log(1-y_predicted_new))

In [33]:
def sigmoid_numpy(X):
    return 1/(1 + np.exp(-X))

sigmoid_numpy(np.array([12, 0, 1]))

array([0.99999386, 0.5       , 0.73105858])

In [38]:
def gradient_descent(age, affordibility, y_true, epochs):
    # we try to come up with best values for w1, w2, bias
    w1 = w2 = 1
    bias = 0
    learning_rate = 0.5

    n = len(age)
    for i in range(epochs):
        weighted_sum = w1*age + w2*affordibility + bias
        y_predicted = sigmoid_numpy(weighted_sum)

        loss = log_loss(y_true, y_predicted)

        w1d = (1/n)*np.dot(np.transpose(age),(y_predicted - y_true)) 
        w2d = (1/n)*np.dot(np.transpose(affordibility),(y_predicted - y_true)) 
        bias_d = np.mean(y_predicted - y_true)

        w1 = w1 - learning_rate*w1d
        w2 = w2 - learning_rate*w2d
        bias = bias - learning_rate*bias_d

        print(f"Epoch:{i}, w1:{w1}, w2:{w2}, bias:{bias}, loss:{loss}")

    return w1, w2, bias

In [39]:
gradient_descent(X_train_scaled['age'], X_train_scaled['affordibility'], y_train, 1000)

Epoch:0, w1:0.9651327599336419, w2:0.9132156721803109, bias:-0.14382765768900405, loss:0.7891487079051143
Epoch:1, w1:0.9379769653459273, w2:0.8415507628236955, bias:-0.2673442691791505, loss:0.7346965627001811
Epoch:2, w1:0.9180103996131841, w2:0.7842056558610542, bias:-0.37204988367651864, loss:0.6959327796723406
Epoch:3, w1:0.9044306654489276, w2:0.7397032473723149, bias:-0.4601251873135651, loss:0.6692121826546181
Epoch:4, w1:0.8963147475937464, w2:0.7062407613989957, bias:-0.5340191281626178, loss:0.6511707645487668
Epoch:5, w1:0.8927450518182025, w2:0.6819764321688436, bias:-0.5961326858110472, loss:0.6390890228584193
Epoch:6, w1:0.8928850363924576, w2:0.6652066939348158, bias:-0.6486349932549182, loss:0.630959810904758
Epoch:7, w1:0.8960121730402525, w2:0.6544480342409057, bias:-0.6933877936671917, loss:0.625390760239743
Epoch:8, w1:0.9015234396503086, w2:0.6484563980715027, bias:-0.7319378967987338, loss:0.6214551868545969
Epoch:9, w1:0.9089264428772414, w2:0.646213210721557, b

(8.272642667168961, 1.7812733810202261, -4.784975671730553)