In [1]:
import pandas as pd

df = pd.read_csv("../Data/advertising.csv")
print(df.head())

      TV  Radio  Newspaper  Sales
0  230.1   37.8       69.2   22.1
1   44.5   39.3       45.1   10.4
2   17.2   45.9       69.3   12.0
3  151.5   41.3       58.5   16.5
4  180.8   10.8       58.4   17.9


In [2]:
import numpy as np

x = df[['TV', 'Radio', 'Newspaper']].values
y = df['Sales'].values

x.shape, y.shape

((200, 3), (200,))

In [3]:
# Scale features
x = (x - x.mean(axis=0)) / x.std(axis=0)

# Scale target variable
y = (y - y.mean()) / y.std()

n_samples, n_features = x.shape
x_bias = np.c_[np.ones((n_samples, 1)), x]

weights = np.zeros(x_bias.shape[1])

In [4]:
epochs = 100  # Increasing epochs for better convergence
learning_rate = 0.01   # Increasing learning rate

Batch

In [5]:
def batch_gradient_descent(x, y, lr, epochs):
    weights = np.zeros(x.shape[1])
    for epoch in range(epochs):
        predictions = x @ weights
        error = predictions - y
        gradient = (2 / len(y)) * x.T @ error
        weights -= lr * gradient

        if (epoch + 1) % 10 == 0 or epoch == 0:
            mse = np.mean(error ** 2)
            print(f"[Batch] Epoch {epoch+1}: MSE = {mse:.4f}")
    return weights

Online

In [6]:
def stochastic_gradient_descent(x, y, lr, epochs):
    weights = np.zeros(x.shape[1])
    for epoch in range(epochs):
        for i in range(len(y)):
            xi = x[i]
            yi = y[i]
            prediction = np.dot(xi, weights)
            error = prediction - yi
            gradient = 2 * error * xi
            weights -= lr * gradient

        if (epoch + 1) % 10 == 0 or epoch == 0:
            predictions = x @ weights
            mse = np.mean((predictions - y) ** 2)
            print(f"[SGD] Epoch {epoch+1}: MSE = {mse:.4f}")
    return weights

Mini-batch

In [7]:
def mini_batch_gradient_descent(x, y, lr, epochs, batch_size=16,):
    weights = np.zeros(x.shape[1])
    n = len(y)

    for epoch in range(epochs):
        indices = np.arange(n)
        np.random.shuffle(indices)
        x_shuffled = x[indices]
        y_shuffled = y[indices]

        for start in range(0, n, batch_size):
            end = start + batch_size
            xb = x_shuffled[start:end]
            yb = y_shuffled[start:end]
            predictions = xb @ weights
            error = predictions - yb
            gradient = (2 / len(yb)) * xb.T @ error
            weights -= lr * gradient

        if (epoch + 1) % 10 == 0 or epoch == 0:
            mse = np.mean((x @ weights - y) ** 2)
            print(f"[Mini-Batch] Epoch {epoch+1}: MSE = {mse:.4f}")
    return weights

In [8]:
print("\n--- Running Batch Gradient Descent ---")
w_batch = batch_gradient_descent(x, y, learning_rate, epochs)
print("\nWeights from Batch Gradient Descent:", w_batch)

print("\n--- Running Stochastic Gradient Descent ---")
w_sgd = stochastic_gradient_descent(x, y, learning_rate, epochs)
print("\nWeights from Stochastic Gradient Descent:", w_sgd)

print("\n--- Running Mini-Batch Gradient Descent ---")
w_minibatch = mini_batch_gradient_descent(x, y,  learning_rate, epochs, batch_size=16)
print("\nWeights from Mini-Batch Gradient Descent:", w_minibatch)


--- Running Batch Gradient Descent ---
[Batch] Epoch 1: MSE = 1.0000
[Batch] Epoch 10: MSE = 0.7118
[Batch] Epoch 20: MSE = 0.5000
[Batch] Epoch 30: MSE = 0.3626
[Batch] Epoch 40: MSE = 0.2729
[Batch] Epoch 50: MSE = 0.2140
[Batch] Epoch 60: MSE = 0.1753
[Batch] Epoch 70: MSE = 0.1496
[Batch] Epoch 80: MSE = 0.1325
[Batch] Epoch 90: MSE = 0.1211
[Batch] Epoch 100: MSE = 0.1135

Weights from Batch Gradient Descent: [0.76928028 0.25927317 0.04218178]

--- Running Stochastic Gradient Descent ---
[SGD] Epoch 1: MSE = 0.0980
[SGD] Epoch 10: MSE = 0.0985
[SGD] Epoch 20: MSE = 0.0985
[SGD] Epoch 30: MSE = 0.0985
[SGD] Epoch 40: MSE = 0.0985
[SGD] Epoch 50: MSE = 0.0985
[SGD] Epoch 60: MSE = 0.0985
[SGD] Epoch 70: MSE = 0.0985
[SGD] Epoch 80: MSE = 0.0985
[SGD] Epoch 90: MSE = 0.0985
[SGD] Epoch 100: MSE = 0.0985

Weights from Stochastic Gradient Descent: [ 0.88709336  0.32373197 -0.03083284]

--- Running Mini-Batch Gradient Descent ---
[Mini-Batch] Epoch 1: MSE = 0.6127
[Mini-Batch] Epoch 10