In [121]:
import numpy as np

In [122]:
def f_wb(X, w, b):
    """
    Linear regression model

    Parameters
    ----------
    X: (ndarray (m, n))
        m is the number of samples, n is the number of features
    w: (ndarray (n,))
    b: (scalar)

    Returns
    -------
    (ndarray (m,))
    """

    return np.dot(X, w) + b

In [123]:
def compute_cost(X, y, w, b):
    """
    Compute cost

    Parameters
    ----------
    X: (ndarray (m, n))
        m is the number of samples, n is the number of features
    y: (ndarray (m,))
    w: (ndarray (n,))
    b: (scalar)

    Returns
    -------
    (scalar)
    """

    m = X.shape[0]
    y_hat = f_wb(X, w, b)
    cost = 1 / (2 * m) * np.sum((y_hat - y) ** 2)
    return cost

In [124]:
def compute_gradient(X, y, w, b):
    """
    Compute the gradient for linear regression

    Parameters
    ----------
    X: (ndarray (m, n))
        m is the number of samples, n is the number of features
    y: (ndarray (m,))
    w: (ndarray (n,))
    b: (scalar)

    Returns
    -------
    dj_dw: (ndarray (n,))
    dj_db: (scalar)
    """

    m = X.shape[0]
    y_hat = f_wb(X, w, b)
    dj_dw = 1 / m * np.dot(X.T, y_hat - y)
    dj_db = 1 / m * np.sum(y_hat - y)
    return dj_dw, dj_db

In [125]:
def perform_gradient_descent(X, y, w_init, b_init, alpha, epochs):
    """
    Perform gradient descent

    Parameters
    ----------
    X: (ndarray (m, n))
        m is the number of samples, n is the number of features
    y: (ndarray (m,))
    w_init: (ndarray (n,))
    b_init: (scalar)
    alpha: (scalar)
    epochs: (int)

    Returns
    -------
    w: (ndarray (n,))
    b: (scalar)
    """

    w = w_init
    b = b_init
    for i in range(epochs):
        dj_dw, dj_db = compute_gradient(X, y, w, b)
        w -= alpha * dj_dw
        b -= alpha * dj_db
        print(f'Epoch {i + 1}, Cost: {compute_cost(X, y, w, b)}')
    return w, b

In [126]:
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])
w, b = perform_gradient_descent(X_train, y_train, np.zeros(X_train.shape[1]), 0., 0.0000001, 1000)
(w, b)

Epoch 1, Cost: 28989.111501449268
Epoch 2, Cost: 17092.48916583246
Epoch 3, Cost: 10198.319962562276
Epoch 4, Cost: 6203.104159134617
Epoch 5, Cost: 3887.8502848154976
Epoch 6, Cost: 2546.145030983679
Epoch 7, Cost: 1768.6173933475704
Epoch 8, Cost: 1318.0342718091906
Epoch 9, Cost: 1056.9175763423789
Epoch 10, Cost: 905.59787958039
Epoch 11, Cost: 817.9062417617257
Epoch 12, Cost: 767.0874687152398
Epoch 13, Cost: 737.6367552876493
Epoch 14, Cost: 720.5689683005712
Epoch 15, Cost: 710.6771659424062
Epoch 16, Cost: 704.9438923234015
Epoch 17, Cost: 701.6205125680231
Epoch 18, Cost: 699.6936816466668
Epoch 19, Cost: 698.5761596710154
Epoch 20, Cost: 697.9276373916437
Epoch 21, Cost: 697.5509034345446
Epoch 22, Cost: 697.3316725544544
Epoch 23, Cost: 697.2037157250268
Epoch 24, Cost: 697.1286528043527
Epoch 25, Cost: 697.0842422561459
Epoch 26, Cost: 697.057594968697
Epoch 27, Cost: 697.0412416198142
Epoch 28, Cost: 697.0308536897855
Epoch 29, Cost: 697.0239227739158
Epoch 30, Cost: 697.

(array([ 0.20253263,  0.00112386, -0.00213202, -0.00933401]),
 np.float64(-0.0003572563919114557))

In [127]:
y_hat = f_wb(X_train, w, b)
(y_train, y_hat)

(array([460, 232, 178]), array([425.71175692, 286.41159657, 172.23087045]))

In [128]:
cost = compute_cost(X_train, y_train, w, b)
cost

np.float64(694.9313850932496)