In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [98]:
class LogisticRegression:

    def __init__(self, params=None):
        self.params = params

    def sigmoid(self, x):
        """Applies the sigmoid function."""
        return 1 / (1 + np.exp(-x))
    
    def negative_log_likelihood(self, X, Y):
        """Computes the negative log-likelihood."""
        logits = X @ self.params
        return -np.sum(Y * np.log(self.sigmoid(logits)) + (1 - Y) * np.log(1 - self.sigmoid(logits)))

    def default_learning_rate(self, X):
        """Computes the default learning rate using the Hessian approximation."""
        logits = X @ self.params
        sigmoid_values = self.sigmoid(logits)
        hessian_approx = np.sum(sigmoid_values * (1 - sigmoid_values))
        return 1 / hessian_approx if hessian_approx != 0 else 0.01

    def gradient(self, X, Y):
        """Computes the gradient of the negative log-likelihood."""
        logits = X @ self.params
        errors = self.sigmoid(logits) - Y
        return X.T @ errors

    def fit(self, X_train, Y_train, learning_rate=None, max_iter=10000, verbose=False):
        """Fits the model using gradient descent with specified learning rate."""
        X = X_train.values
        Y = Y_train.to_numpy()
        n_features = X.shape[1]
        self.params = np.zeros(n_features)  # Initialize parameters

        if learning_rate is None:
            learning_rate = self.default_learning_rate(X)
        
        for iter in range(max_iter):
            grad = self.gradient(X, Y)
            self.params -= learning_rate * grad  # Gradient descent update

            # Optionally print the loss every 100 iterations
            if verbose:
                if i % 100 == 0:
                    loss = self.negative_log_likelihood(X, Y)
                    print(f"Iteration {i}: Loss = {loss:.4f}")

    def predict_probs(self, X_train):
        """Predicts probabilities of labels for input data."""
        return self.sigmoid(X_train @ self.params)

    def predict(self, X_train, threshold=0.5):
        """Predicts class labels for input data with a given threshhold."""
        return (self.predict_probs(X_train) >= threshold).astype(int)

In [100]:
def test(logistic_regression_model, n_samples=1000, N_columns=2, true_params=None, 
         learning_rate=None, max_iter=1000, random_seed=None):
    """Function testing the accuracy of the logistic regression class"""
    random_state = np.random.default_rng(random_seed)
    X_train = pd.DataFrame(random_state.random((n_samples, N_columns)))
    X_train["constant"] = 1
    if not true_params:
        true_params = random_state.random(N_columns+1)
    Y_train = pd.Series((random_state.random(n_samples) < 1 / (1 + np.exp(-(X_train @ true_params)))).astype(int))

    # Fit the logistic regression model
    model = logistic_regression_model()
    model.fit(X_train, Y_train, learning_rate=learning_rate, max_iter=max_iter)

    print(f"Correct parameters: {true_params}, Fitted parameters: {model.params}")

    predictions = model.predict(X_train)
    print(f"Model's accuracy:{np.mean(predictions == Y_train)}")

In [101]:
test(LogisticRegression, n_samples=1000, N_columns=2, max_iter=10000, random_seed=1)

Correct parameters: [0.28417258 0.00980147 0.11038067], Fitted parameters: [0.54393037 0.20567226 0.02519313]
Model's accuracy:0.598
