In [3]:
import numpy as np
import matplotlib.pyplot as plt


In [8]:
def compute_cost_linear_reg(X, y, w, b, lambda_ = 1):
    m, n = X.shape
    total_cost = 0
    for i in range(m):
        f_wb_i = np.dot(w, X[i]) + b
        total_cost += (f_wb_i - y[i])**2
    total_cost /= 2*m
    reg_cost = 0
    for j in range(n):
        reg_cost += w[j] ** 2
    reg_cost = lambda_/(2*m) * (reg_cost)
    res = reg_cost + total_cost
    return res

#As we all have learned that the cost of the linear function solely based on the mean squared error, so what does reg does really do here? What is it importance?
# First off, regularization is borned to minimizing the w at balance scale so that it can add up a penalty term for the cost function.
# So let's say that we already had MSE stood but we had large x in polominial scale. Therefore, we need to have to do something in order to add up on another penalty term that is equally to reduce overfitting cases.
# By adding up another penalty term, our cost function will be increased. Still, it would be slightly. And, unimportant. Because, gradient will help us with it.



In [7]:
np.random.seed(1)
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1]).reshape(-1,)-0.5
b_tmp = 0.5
lambda_tmp = 0.7
cost_tmp = compute_cost_linear_reg(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print("Regularized cost:", cost_tmp)

Regularized cost: 0.07917239320214277


In [28]:
def sigmoid(z):
    return 1/(1+np.exp(z))

def compute_cost_logistic_reg(X, y, w, b, lambda_ = 1):
    m, n = X.shape
    cost = 0
    # h = sigmoid((np.dot(X, w) + b))
    for i in range(m):
        z_i = np.dot(X[i], w) + b
        f_wb_i = sigmoid(z_i)
        cost +=  -y[i]*np.log(f_wb_i) - (1-y[i])*np.log(1-f_wb_i)
    cost /= m
    reg_cost = 0        
    for j in range(n):
        reg_cost += (w[j] ** 2)
    reg_cost = (lambda_/(2*m)) * reg_cost
    total_cost = reg_cost + cost
    return total_cost 

In [None]:
np.random.seed(1)
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1]).reshape(-1,)-0.5
b_tmp = 0.5
lambda_tmp = 0.7
cost_tmp = compute_cost_logistic_reg(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print("Regularized cost:", cost_tmp)

# As we all know, that the cost_function of logistic regression only penalizes for which that they predicted it is wrong. For instance
# If my y[i] is false, and my predicted f_wb_i equals right. Therefore, it has to be penalized within the left formula and see what it is wrong
# Regularization works the same as above.

Regularized cost: 0.818117348296815


In [None]:
def compute_gradient(X, y, w, b, lambda_ = 1):
    m, n = X.shape
    dj_dw = np.zeros(n)
    dj_db = 0
    for i in range (m):
        err = sigmoid((np.dot(X[i], w) + b)) - y[i]
        for j in range(n):
            dj_dw[j] += err * X[i, j]
        dj_db += err
    dj_dw /= m
    dj_db /= m
    for j in range(n):
        dj_dw[j] = dj_dw[j] + (lambda_/m) * w[j]
    return dj_db, dj_dw

#dj_dw, dj_db acts as a slope for all training examples. Directing them towards the minimum local point.
#Due to regularization inside the cost function, therefore, it is necessary to add up in compute gradient. 
#But, once we step in gradient descent, w[j] gets updated everytime correct? Therefore, w[j] is absoultely neccessary to be reduced, and regularization will help with it. It gets pull back towards 0.



dj_db: 0.20376447676428128
Regularized dj_dw:
 [0.29653214748822276, 0.4911679625918033, 0.21645877535865857]


In [40]:
def gradient_descent(X, y, w_ini, b_ini, lambda_, alpha, num_iter):
    w = w_ini
    b = b_ini
    m, n = X.shape
    for i in range(num_iter):
        dj_dw, dj_db = compute_gradient(X, y, w, b)
        w = w - alpha * dj_dw
        b = b- alpha * dj_db
    return w, b
