In [6]:
%matplotlib inline

import scipy 
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd 
import copy

#importing required modules, scipy to do the 

In [56]:
data = pd.read_csv("DataSets/diabetes-dataset.csv")
#data has been imported 
#creation of train test and outcome datasets
x_train=copy.copy(data)
x_train.drop(['Outcome'],axis=1,inplace=True)
y_train=copy.copy(data)
y_train.drop(['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','DiabetesPedigreeFunction','Age'],axis=1,inplace=True)
y_train=y_train.to_numpy()
x_train=x_train.to_numpy()
# y_train=y_train
# x_train=x_train

In [46]:
#standardising the train dataset
x_train=x_train/ x_train.std()
x_train.shape

(8, 2000)

In [9]:
#The sigmoid function
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [39]:
#initializer for w and b

def init(dim):
    w=np.zeros((dim,1))
    b=0.0
    return w,b

In [54]:
#forward and backward propagation

def propagate(w,b,X,Y):
    #get number of columns
    m=X.shape[0]

    #computing activation function
    A=sigmoid(np.dot(w.T,X)+b)
    #computing cost function

    cost=- (1/m)*np.sum((Y*np.log(A))+(1-Y)*(1-np.log(A)))

    #backward propagation:
    dw=(np.dot(X,(A-Y).T))/m
    db=np.sum(A-Y)/m 

    cost=np.squeeze(np.array(cost))

    grads = {
        "dw": dw,
        "db": db
    }
    return grads,cost

In [32]:
#updating the parameters using gradient descent after each epoch

def optimize(w,b,X,Y,num_iter,learn_rate,print_cost=False):
    costs=[]
    for i in range(num_iter): #number of epochs
        grads,cost=propagate(w,b,X,Y)

        dw=grads["dw"]
        db=grads["db"]

        w=w-(learn_rate*dw)
        b=b-(learn_rate*db)

        if i%100==0:
            costs.append(cost) #logging the cost
            if print_cost: 
                print("Cost after",i," iterations is =",cost)
        
    params={
        "w":w,
        "b":b
    }

    grads={
        "dw":dw,
        "db":db
    }

    return params,grads,costs

In [61]:
#using the created algorithm to predict new algorithms 

def predict(w,b,X):
    m=X.shape[1]
    Y_prediction=np.zeros((1,m))
    #w=w.reshape(X.shape[0],1)

    #forward pass
    A=sigmoid(np.dot(w.T,X)+b)
    for i in range(A.shape[1]):
        Y_prediction[0, i] = 1 if A[0,i] >=0.5 else 0
    
    return Y_prediction

In [59]:
#a wrapper class

def model(X_train,Y_train,X_test,Y_test,num_iter=1000,learn_rate=0.5,print_cost=True):
    w,b=init(X_train.shape[0])
    params,grads,costs=optimize(w,b,X_train,Y_train,num_iter,learn_rate,print_cost)
    w=params["w"]
    b=params["b"]
    Y_prediction_test=predict(w,b,X_test)
    Y_prediction_train=predict(w,b,X_train)

    print("Train accuracy: {} %".format(100-np.mean(np.abs(Y_prediction_train-Y_train))*100))
    print("Test accuracy: {} %".format(100-np.mean(np.abs(Y_prediction_test-Y_test))*100))

    d = {"costs": costs,
        "Y_prediction_test": Y_prediction_test, 
        "Y_prediction_train" : Y_prediction_train, 
        "w" : w, 
        "b" : b,
        "learning_rate" : learn_rate,
        "num_iterations": num_iter}
    
    return d



In [62]:
log_reg_mod=model(x_train,y_train,x_train,y_train,num_iter=2000,learn_rate=0.0005,print_cost=True)

Cost after 0  iterations is = -7.016276072455542
Cost after 100  iterations is = -60.443771263026264
Cost after 200  iterations is = -75.1245986053976
Cost after 300  iterations is = -86.48767708377011
Cost after 400  iterations is = -95.99064233814686
Cost after 500  iterations is = -104.23935324199883
Cost after 600  iterations is = -111.57294137851503
Cost after 700  iterations is = -118.20672025686561
Cost after 800  iterations is = -124.28736956792822
Cost after 900  iterations is = -129.91963074033293
Cost after 1000  iterations is = -135.18095909684982
Cost after 1100  iterations is = -140.13023603797205
Cost after 1200  iterations is = -144.8132492397551
Cost after 1300  iterations is = -149.26629023595976
Cost after 1400  iterations is = -153.51859814689044
Cost after 1500  iterations is = -157.59406726977173
Cost after 1600  iterations is = -161.51246931806975
Cost after 1700  iterations is = -165.29034665160222
Cost after 1800  iterations is = -168.94167708120702
Cost after 