In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, log_loss
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import statsmodels.api as sm
from scipy.optimize import minimize
import logging

In [2]:
# Generate a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=10, random_state=42)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [3]:
# Logistic Regression with scikit-learn
sklearn_model = LogisticRegression(solver='lbfgs')
sklearn_model.fit(X_train, y_train)

y_pred_sklearn = sklearn_model.predict(X_test)
y_pred_proba_sklearn = sklearn_model.predict_proba(X_test)[:, 1]

accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)
log_loss_sklearn = log_loss(y_test, y_pred_proba_sklearn)
print(f"Accuracy (scikit-learn): {accuracy_sklearn:.4f}")
print(f"Log Loss (scikit-learn): {log_loss_sklearn:.4f}")

Accuracy (scikit-learn): 0.8467
Log Loss (scikit-learn): 0.3566


In [4]:
# Logistic Regression with statsmodels
X_train_sm = sm.add_constant(X_train)  # Adding intercept term
X_test_sm = sm.add_constant(X_test)

statsmodels_model = sm.Logit(y_train, X_train_sm).fit()

y_pred_proba_sm = statsmodels_model.predict(X_test_sm)
y_pred_sm = (y_pred_proba_sm >= 0.5).astype(int)

accuracy_sm = accuracy_score(y_test, y_pred_sm)
log_loss_sm = log_loss(y_test, y_pred_proba_sm)
print(f"\nAccuracy (statsmodels): {accuracy_sm:.4f}")
print(f"Log Loss (statsmodels): {log_loss_sm:.4f}")

Optimization terminated successfully.
         Current function value: 0.322850
         Iterations 8

Accuracy (statsmodels): 0.8400
Log Loss (statsmodels): 0.3576


In [5]:
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class LogisticRegressionLagrange:
    """
    Logistic Regression using Maximum Likelihood Estimation (MLE) with Lagrange multipliers.

    Attributes:
    -----------
    X : np.ndarray
        The feature matrix.
    y : np.ndarray
        The target vector.
    beta : np.ndarray
        The estimated coefficients after fitting the model.
    lambdas : np.ndarray
        The Lagrange multipliers for the constraints.
    constraints : list of callable
        A list of constraint functions that return a scalar value to enforce a condition.

    Methods:
    --------
    fit(solver='BFGS', regularization=None, alpha=1.0):
        Fits the logistic regression model with optional regularization and constraints.
    predict_proba(X_new):
        Predicts probabilities for new data.
    predict(X_new):
        Predicts binary or multi-class class labels for new data.
    evaluate(X_test, y_test):
        Evaluates the model on test data, returning accuracy and log loss.
    """

    def __init__(self, X, y, constraints=[]):
        self.X = X
        self.y = y
        self.beta = None
        self.lambdas = None  # Lagrange multipliers
        self.constraints = constraints  # List of constraint functions

    def sigmoid(self, z):
        """Computes the sigmoid function for binary classification."""
        return 1 / (1 + np.exp(-z))

    def softmax(self, z):
        """Computes the softmax function for multi-class classification."""
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / exp_z.sum(axis=1, keepdims=True)

    def log_likelihood(self, beta):
        """Computes the negative log-likelihood for binary classification."""
        z = np.dot(self.X, beta)
        return -np.sum(self.y * z - np.log(1 + np.exp(z)))

    def log_likelihood_multi(self, beta):
        """Computes the negative log-likelihood for multi-class classification."""
        z = np.dot(self.X, beta)
        prob = self.softmax(z)
        return -np.sum(self.y * np.log(prob))

    def regularization(self, beta, penalty='L2', alpha=1.0):
        """Adds regularization terms to the objective function."""
        if penalty == 'L2':
            return alpha * np.sum(beta**2)  # L2 regularization
        elif penalty == 'L1':
            return alpha * np.sum(np.abs(beta))  # L1 regularization
        elif penalty == 'ElasticNet':
            return alpha * (0.5 * np.sum(beta**2) + 0.5 * np.sum(np.abs(beta)))  # Elastic Net
        return 0  # No regularization

    def constraint_function(self, beta):
        """Applies the constraints and returns the sum of all constraint violations."""
        if not self.constraints:
            return 0  # No constraints
        else:
            return sum(constraint(beta) for constraint in self.constraints)

    def lagrangian(self, params, regularization=None, alpha=1.0):
        """Defines the Lagrangian function."""
        beta = params[:self.X.shape[1]]  # The coefficients
        lambdas = params[self.X.shape[1]:]  # The Lagrange multipliers

        # Compute the log-likelihood (binary or multi-class)
        if self.y.ndim == 1:  # Binary classification
            log_likelihood_val = self.log_likelihood(beta)
        else:  # Multi-class classification
            log_likelihood_val = self.log_likelihood_multi(beta)

        # Apply regularization
        reg_val = self.regularization(beta, penalty=regularization, alpha=alpha)

        # Compute constraint violations
        constraint_val = sum(lambdas[i] * self.constraints[i](beta) for i in range(len(self.constraints)))

        # Return Lagrangian
        return log_likelihood_val + reg_val + constraint_val

    def gradient(self, params, regularization=None, alpha=1.0):
        """Computes the gradient of the Lagrangian."""
        beta = params[:self.X.shape[1]]
        lambdas = params[self.X.shape[1]:]

        z = np.dot(self.X, beta)
        if self.y.ndim == 1:  # Binary classification
            prob = self.sigmoid(z)
            grad_log_likelihood = np.dot(self.X.T, prob - self.y)
        else:  # Multi-class classification
            prob = self.softmax(z)
            grad_log_likelihood = np.dot(self.X.T, (prob - self.y))

        # Gradient of the constraint
        grad_constraint = np.sum([self.constraints[i](beta) for i in range(len(self.constraints))])

        # Gradient of regularization
        if regularization == 'L2':
            grad_reg = alpha * 2 * beta
        elif regularization == 'L1':
            grad_reg = alpha * np.sign(beta)
        else:
            grad_reg = 0

        # Combine gradients
        grad_lagrangian = grad_log_likelihood + grad_reg + grad_constraint
        return np.concatenate([grad_lagrangian, np.zeros(len(lambdas))])  # No gradient for lambdas

    def fit(self, solver='BFGS', regularization=None, alpha=1.0):
        """
        Fits the logistic regression model using the Lagrangian approach.

        Parameters:
        -----------
        solver : str, optional
            The optimization solver to use (default is 'BFGS').
        regularization : str, optional
            The type of regularization to apply ('L1', 'L2', or 'ElasticNet').
        alpha : float, optional
            The regularization strength (default is 1.0).
        """
        initial_params = np.zeros(self.X.shape[1] + len(self.constraints))  # Initialize beta and lambdas

        # Minimize the Lagrangian
        result = minimize(self.lagrangian, initial_params, method=solver, jac=self.gradient,
                          args=(regularization, alpha))
        self.beta = result.x[:self.X.shape[1]]  # Extract coefficients
        self.lambdas = result.x[self.X.shape[1]:]  # Extract Lagrange multipliers

        logger.info(f"Optimization completed with beta: {self.beta}, lambdas: {self.lambdas}")
        return self.beta

    def predict_proba(self, X_new):
        """Predicts probabilities for new data."""
        z = np.dot(X_new, self.beta)
        if self.y.ndim == 1:  # Binary classification
            return self.sigmoid(z)
        else:  # Multi-class classification
            return self.softmax(z)

    def predict(self, X_new):
        """Predicts binary or multi-class class labels for new data."""
        if self.y.ndim == 1:  # Binary classification
            return (self.predict_proba(X_new) >= 0.5).astype(int)
        else:  # Multi-class classification
            return np.argmax(self.predict_proba(X_new), axis=1)

    def evaluate(self, X_test, y_test):
        """Evaluates the model on test data, returning accuracy and log loss."""
        y_pred_proba = self.predict_proba(X_test)
        y_pred = self.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        log_loss_val = log_loss(y_test, y_pred_proba)
        logger.info(f"Evaluation completed: Accuracy = {accuracy:.4f}, Log Loss = {log_loss_val:.4f}")
        return accuracy, log_loss_val

In [6]:
# Constraint: Sum of the coefficients (excluding bias) must be <= 1
def constraint(beta):
    return np.sum(beta[:-1]) - 1  # Sum of all coefficients <= 1

# Fit the model
mle_model_lagrange = LogisticRegressionLagrange(X_train, y_train, constraints=[constraint])
mle_model_lagrange.fit(solver='BFGS', regularization='L2', alpha=0.1)

# Evaluate the model
accuracy_lagrange, log_loss_lagrange = mle_model_lagrange.evaluate(X_test, y_test)
print(f"Accuracy (Lagrange): {accuracy_lagrange:.4f}")
print(f"Log Loss (Lagrange): {log_loss_lagrange:.4f}")

Accuracy (Lagrange): 0.8400
Log Loss (Lagrange): 0.3554
