In [None]:
#Implementation of stochastic and batch grandient descent in python

"""
We will use very simple home prices data set to implement batch and stochastic gradient descent in python. 
Batch gradient descent uses all training samples in forward pass to calculate cumulitive error and than 
we adjust weights using derivaties. In stochastic GD, we randomly pick one training sample, perform forward pass, 
compute the error and immidiately adjust weights. So the key difference here is that to adjust weights batch GD 
will use all training samples where as stochastic GD will use one randomly picked training sample
"""

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
df = pd.read_csv("homeprices_banglore.csv")
df.sample(5)

In [None]:
from sklearn import preprocessing
#x is area and bedrooms and y is price
sx = preprocessing.MinMaxScaler()
sy = preprocessing.MinMaxScaler()

#scaled x is the dataframe we get by dropping price from the main df
scaled_X = sx.fit_transform(df.drop('price',axis='columns'))
#scaled y is the dataframe we get from price column of main df we reshape the array
#as a 2d df cause minmaxscaler needs a 2d df, here rows is numbe rof rows in main df
#and cols is 1
scaled_y = sy.fit_transform(df['price'].values.reshape(df.shape[0],1))

scaled_X

In [None]:
scaled_y

In [None]:
#We should convert target column (i.e. price) into one dimensional array. 
#It has become 2D due to scaling that we did above but now we should change to 1D
scaled_y.reshape(20,)

In [None]:
# Batch Gradient Descent Implementation

def batch_gradient_descent(X, y_true, epochs, learning_rate = 0.01):

    number_of_features = X.shape[1] [60000,2] so number of features =2
    # numpy array with 1 row and columns equal to number of features. In 
    # our case number_of_features = 2 (area, bedroom)
    w = np.ones(shape=(number_of_features)) #(1,1)
    b = 0
    total_samples = X.shape[0] # number of rows in X

    #two lists to push the cost value and epoch number
    cost_list = []
    epoch_list = []
    
    for i in range(epochs):        
        # price is w1*sqft+w2*numbe rof rooms +b
        #Matrix multiplication of dot product between w and X.T, resulting in a predicted output.
        y_predicted = np.dot(w, X.T) + b

        #Gradient of the Loss w.r.t w (Weight Update)
        w_grad = -(2/total_samples)*(X.T.dot(y_true-y_predicted))
        b_grad = -(2/total_samples)*np.sum(y_true-y_predicted)

        #we reduce the wgrad in accordance to learning rate
        w = w - learning_rate * w_grad
        b = b - learning_rate * b_grad

        #calculating mse loss
        cost = np.mean(np.square(y_true-y_predicted)) # MSE (Mean Squared Error)

        #if epoch divisible by 10 oush epoch and loss to arrays for plotting
        if i%10==0:
            cost_list.append(cost)
            epoch_list.append(i)
        
    return w, b, cost, cost_list, epoch_list

w, b, cost, cost_list, epoch_list = batch_gradient_descent(scaled_X,scaled_y.reshape(scaled_y.shape[0],),500)
w, b, cost

In [None]:
plt.xlabel("epoch")
plt.ylabel("cost")
plt.plot(epoch_list,cost_list)

In [None]:
# A prediction

def predict(area,bedrooms,w,b):
    scaled_X = sx.transform([[area, bedrooms]])[0]
    scaled_price = w[0] * scaled_X[0] + w[1] * scaled_X[1] + b
    # once we get price prediction we need to to rescal it back to original value
    # also since it returns 2D array, to get single value we need to do value[0][0]
    return sy.inverse_transform([[scaled_price]])[0][0]

predict(2600,4,w,b)

In [None]:
predict(1000,2,w,b)

In [None]:
predict(1500,3,w,b)

In [None]:
#Stochastic Gradient Descent Implementation

# we will use random libary to pick random training sample.
import random
random.randint(0,6) # randit gives random number between two numbers specified in the argument

def stochastic_gradient_descent(X, y_true, epochs, learning_rate = 0.01):
 
    number_of_features = X.shape[1]
    # numpy array with 1 row and columns equal to number of features. In 
    # our case number_of_features =2 (area, bedroom)
    w = np.ones(shape=(number_of_features)) 
    b = 0
    total_samples = X.shape[0]
    
    cost_list = []
    epoch_list = []

    #this means over the all epoch values select a value and selct its x and y respectively
    for i in range(epochs):    
        random_index = random.randint(0,total_samples-1) # random index from total samples
        sample_x = X[random_index]
        sample_y = y_true[random_index]
        
        y_predicted = np.dot(w, sample_x.T) + b
    
        w_grad = -(2/total_samples)*(sample_x.T.dot(sample_y-y_predicted))
        b_grad = -(2/total_samples)*(sample_y-y_predicted)
        
        w = w - learning_rate * w_grad
        b = b - learning_rate * b_grad
        
        cost = np.square(sample_y-y_predicted)
        
        if i%100==0: # at every 100th iteration record the cost and epoch value
            cost_list.append(cost)
            epoch_list.append(i)
        
    return w, b, cost, cost_list, epoch_list

w_sgd, b_sgd, cost_sgd, cost_list_sgd, epoch_list_sgd = stochastic_gradient_descent(scaled_X,scaled_y.reshape(scaled_y.shape[0],),10000)
w_sgd, b_sgd, cost_sgd

In [None]:
#Mini-Batch Gradient Descent Implementation
np.random.permutation(20)

In [None]:
def mini_batch_gradient_descent(X, y_true, epochs = 100, batch_size = 5, learning_rate = 0.01):
    
    number_of_features = X.shape[1]
    w = np.ones(shape=(number_of_features)) 
    b = 0
    total_samples = X.shape[0] # number of rows in X

    #if samples is less than batch size take all sample
    if batch_size > total_samples: 
        batch_size = total_samples
        
    cost_list = []
    epoch_list = []
    
    num_batches = int(total_samples/batch_size)
    """
    num_batches determines how many batches will be created per epoch.
    If total_samples = 1000 and batch_size = 50, then num_batches = 1000 / 50 = 20.
    This means each epoch processes 20 mini-batches.

    np.random.permutation(total_samples): Generates a random order of indices.
    X_tmp = X[random_indices]: Shuffles X using the new random order.
    y_tmp = y_true[random_indices]: Shuffles y_true in the same way.
    Shuffling ensures that each epoch gets different mini-batches, preventing the model from learning in a fixed order.

    """
    for i in range(epochs):    
        random_indices = np.random.permutation(total_samples)
        X_tmp = X[random_indices]
        y_tmp = y_true[random_indices]
        
        for j in range(0,total_samples,batch_size):
            Xj = X_tmp[j:j+batch_size]
            yj = y_tmp[j:j+batch_size]
            y_predicted = np.dot(w, Xj.T) + b
            
            w_grad = -(2/len(Xj))*(Xj.T.dot(yj-y_predicted))
            b_grad = -(2/len(Xj))*np.sum(yj-y_predicted)
            
            w = w - learning_rate * w_grad
            b = b - learning_rate * b_grad
                
            cost = np.mean(np.square(yj-y_predicted)) # MSE (Mean Squared Error)
        
        if i%10==0:
            cost_list.append(cost)
            epoch_list.append(i)
        
    return w, b, cost, cost_list, epoch_list

w, b, cost, cost_list, epoch_list = 
mini_batch_gradient_descent(scaled_X,scaled_y.reshape(scaled_y.shape[0],),epochs = 120,batch_size = 5)

w, b, cost

In [None]:
plt.xlabel("epoch")
plt.ylabel("cost")
plt.plot(epoch_list,cost_list)