# Problem 2:
Logistic regression with Newton's method

In [30]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

Logistic Regression - Sigmoid Function

In [31]:
def sigmoid(z):
    return 1/(1+np.exp(-np.clip(z, -500, 500)))

Gradient first derivative:

In [32]:
def gradient_lr(X, y, theta):
    n = X.shape[0]

    z = X @ theta
    predictions = sigmoid(z)

    gradient = (1/n) * X.T @ (predictions - y)

    return gradient, predictions

Hessian Matrix Second Derivative:

In [33]:
def hessian_matrix(X, predictions):
    n = X.shape[0]

    uncertainty_weights = predictions * (1-predictions)

    W = np.diag(uncertainty_weights)

    X_transpose_W = X.T @ W

    hessian = (1/n) * X_transpose_W @ X

    return hessian

Newton's Update:

In [34]:
def newton_update(theta, gradient, hessian):
    d = hessian.shape[0]

    hessian_regularization = hessian + np.eye(d) * 1e-8

    hessian_inverse = np.linalg.inv(hessian_regularization)

    delta = hessian_inverse @ gradient

    theta_new = theta - delta

    return theta_new, delta

Newton's Method with Logistic Regression:

In [35]:
def newton_method_log(X, y, max_iter=15, tol=1e-6):
    n, d = X.shape
    theta = np.zeros(d)

    loss_history = []
    gradient_norms = []

    for iteration in range(max_iter):
        gradient, predictions = gradient_lr(X, y, theta)

        hessian = hessian_matrix(X, predictions)

        theta, delta = newton_update(theta, gradient, hessian)

        gradient_norm = np.linalg.norm(gradient)
        gradient_norms.append(gradient_norms)

        epsilon = 1e-7
        loss = -np.mean(
            y*np.log(predictions + epsilon) +
            (1-y) * np.log(1-predictions+epsilon)
        )
        loss_history.append(loss)

        # iteration progress check:
        if iteration % 3 ==0:
            print(f"Iter {iteration:2d}: ||gradient|| = {gradient_norm:.6e}")

        if gradient_norm < tol:
            print(f"\n Converged at iteration {iteration}")
            break

    return theta, loss_history, gradient_norms

Spambase data preprocessing:

In [36]:
data = np.loadtxt("spambase/spambase.data", delimiter=",")
X = data[:, :-1]
y = data[:, -1]

scaler_spam = StandardScaler()
X_normalized = scaler_spam.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

X_train_bias = np.c_[np.ones((X_train.shape[0], 1)), X_train]
X_test_bias = np.c_[np.ones((X_test.shape[0], 1)), X_test]

Now training Newton's update on Spambase:

In [37]:
theta_newton, loss_newton, grad_norms_newton = newton_method_log(
    X_train_bias, y_train, max_iter=20, tol=1e-6
)

train_prob_newton = sigmoid(X_train_bias @ theta_newton)
test_prob_newton = sigmoid(X_test_bias @ theta_newton)

train_pred_newton = (train_prob_newton >= 0.5).astype(int)
test_pred_newton = (test_prob_newton >= 0.5).astype(int)

train_accuracy = np.mean(train_pred_newton == y_train)
test_accuracy = np.mean(test_pred_newton == y_test)

print("\n" + "="*60)
print("NEWTON'S METHOD RESULTS:")
print(f"Converged in: {len(grad_norms_newton)} iterations")
print(f"Train Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print("="*60)

Iter  0: ||gradient|| = 6.798478e-01
Iter  3: ||gradient|| = 2.921431e-02
Iter  6: ||gradient|| = 2.729030e-03
Iter  9: ||gradient|| = 7.854493e-04
Iter 12: ||gradient|| = 1.321202e-05

 Converged at iteration 13

NEWTON'S METHOD RESULTS:
Converged in: 14 iterations
Train Accuracy: 0.9315
Test Accuracy: 0.9131
