In [29]:
import numpy as np

def l1_regularization_gradient_descent(X: np.array, y: np.array, alpha: float = 0.1, learning_rate: float = 0.01, max_iter: int = 1000, tol: float = 1e-4) -> tuple:

    m, n = X.shape
    
    weights = np.zeros(n)
    bias = 0

    def y_hat(X, weights, bias):
        return X.dot(weights) + bias
    
    for _ in range(max_iter):

        preds = y_hat(X, weights, bias)
        grad_w = (1 / m) * X.T.dot(preds - y) + alpha * np.sign(weights)
        grad_b = (1 / m) * np.sum(preds - y)

        if np.linalg.norm(grad_w) < tol and abs(grad_b) < tol:
            break
        
        weights -= learning_rate * grad_w
        bias -= learning_rate * grad_b

    return weights, bias

In [30]:
X = np.array([[0, 0], [1, 1], [2, 2]])
y = np.array([0, 1, 2])

alpha = 0.1
weights, bias = l1_regularization_gradient_descent(X, y, alpha=alpha, learning_rate=0.01, max_iter=1000)

In [31]:
weights, bias

(array([0.42371644, 0.42371644]), 0.15385068459377865)