In [4]:
import numpy as np

def elastic_net_gradient_descent(
    X: np.ndarray,
    y: np.ndarray,
    alpha1: float = 0.1,
    alpha2: float = 0.1,
    learning_rate: float = 0.01,
    max_iter: int = 1000,
    tol: float = 1e-4,
) -> tuple:
    
    m, n = X.shape
    weights = np.zeros(n)
    bias = 0

    def y_hat(X, weights, bias):
        return X.dot(weights) + bias

    for _ in range(max_iter):

        preds = y_hat(X, weights, bias)
        grad_w = (1 / m) * X.T.dot(preds - y) + alpha1 * np.sign(weights) + alpha2 * 2 * weights
        grad_b = (1 / m) * np.sum(preds - y)

        if np.linalg.norm(grad_w) < tol or abs(grad_b) < tol:
            break

        weights -= learning_rate * grad_w
        bias -= learning_rate * grad_b

    return weights, bias



In [5]:
X = np.array([[0, 0], [1, 1], [2, 2]])
y = np.array([0, 1, 2])

elastic_net_gradient_descent(X, y)

(array([0.37323965, 0.37323965]), 0.24784989555671158)