# IRLS Class

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy.stats import norm
from scipy.special import gammaln, digamma, polygamma

class IRLSRegression:
    def __init__(self, family='linear', link='identity', inverse_link=None,
                 regularization='none', reg_lambda=1.0, reg_alpha=0.5,
                 l1_method='coordinate_descent', tol=1e-6, max_iter=100,
                 max_cd_iter=100, cd_tol=1e-4, verbose=False,
                 scale=False, scaling_method='standardization',
                 encode_categorical=False, categorical_features=None,
                 categorical_encoding=None, ordinal_mapping=None):

        self.family = family.lower()
        self.tol = tol
        self.max_iter = max_iter
        self.verbose = verbose
        self.beta_ = None

        # Initialize alpha if needed
        if self.family in ['negative_binomial', 'gamma']:
            self.alpha_ = 1.0
        else:
            self.alpha_ = None

        # Regularization checks
        self.regularization = regularization.lower()
        if self.regularization not in ['none','l1','l2','elasticnet']:
            raise ValueError("Invalid regularization.")
        self.reg_lambda = reg_lambda
        self.reg_alpha = reg_alpha

        self.l1_method = l1_method.lower()
        if self.l1_method not in ['soft_thresholding','coordinate_descent']:
            raise ValueError("l1_method must be 'soft_thresholding' or 'coordinate_descent'")

        self.max_cd_iter = max_cd_iter
        self.cd_tol = cd_tol

        self.scale = scale
        self.scaling_method = scaling_method.lower()
        if self.scaling_method not in ['standardization','minmax']:
            raise ValueError("scaling_method must be 'standardization' or 'minmax'")

        self.encode_categorical = encode_categorical
        self.categorical_features = categorical_features
        self.categorical_encoding = categorical_encoding
        self.ordinal_mapping = ordinal_mapping

        if self.encode_categorical:
            if not self.categorical_features:
                raise ValueError("Must provide categorical_features.")
            if not self.categorical_encoding:
                raise ValueError("Must provide categorical_encoding.")
            for f in self.categorical_features:
                if f not in self.categorical_encoding:
                    raise ValueError(f"No encoding for {f}.")
                if self.categorical_encoding[f] not in ['nominal','ordinal']:
                    raise ValueError(f"Invalid encoding for {f}.")
            ordinal_feats=[f for f in self.categorical_features if self.categorical_encoding[f]=='ordinal']
            if ordinal_feats and not self.ordinal_mapping:
                raise ValueError("Must provide ordinal_mapping for ordinal features.")
            for f in ordinal_feats:
                if f not in self.ordinal_mapping:
                    raise ValueError(f"No mapping for {f}.")

        # Define supported links
        self.supported_links = {
            'linear': {
                'identity': (self.identity, self.identity),
            },
            'logistic': {
                'logit': (self.logit, self.sigmoid),
                'probit': (self.probit, self.probit_inverse),
            },
            'poisson': {
                'log': (self.log, self.exp),
                'identity': (self.identity, self.identity),
            },
            'negative_binomial': {
                'log': (self.log, self.exp),
            },
            'gamma': {
                'inverse': (self.inverse_link_gamma, self.inverse_link_gamma_inv),
                'log': (self.log, self.exp),
            }
        }

        # Assign link and inverse link
        if isinstance(link, str):
            if self.family not in self.supported_links:
                raise ValueError(f"Unsupported family '{self.family}'")
            if link in self.supported_links[self.family]:
                self.link_func, self.inv_link_func = self.supported_links[self.family][link]
            else:
                raise ValueError(f"Unsupported link '{link}' for '{self.family}'.")
        elif callable(link):
            if inverse_link is None or not callable(inverse_link):
                raise ValueError("Must provide inverse_link if link is callable.")
            self.link_func = link
            self.inv_link_func = inverse_link
        else:
            raise ValueError("Invalid link specification.")

        self.dummy_columns = []
        self.ordinal_mappings = {}
        self.scaler_params_ = {}
        self.scaler_features_ = []
        self.feature_names_ = []

    @staticmethod
    def identity(mu):
        return mu

    @staticmethod
    def logit(mu):
        mu = np.clip(mu,1e-10,1-1e-10)
        return np.log(mu/(1-mu))

    @staticmethod
    def sigmoid(eta):
        return 1/(1+np.exp(-eta))

    @staticmethod
    def probit(mu):
        return norm.ppf(mu)

    @staticmethod
    def probit_inverse(eta):
        return norm.cdf(eta)

    @staticmethod
    def log(mu):
        mu = np.clip(mu,1e-10,None)
        return np.log(mu)

    @staticmethod
    def exp(eta):
        return np.exp(eta)

    @staticmethod
    def inverse_link_gamma(mu):
        mu=np.clip(mu,1e-10,None)
        return 1/mu

    @staticmethod
    def inverse_link_gamma_inv(eta):
        eta=np.clip(eta,1e-10,None)
        return 1/eta

    def variance(self, mu):
        if self.family=='linear':
            return 1
        elif self.family=='logistic':
            return mu*(1-mu)
        elif self.family=='poisson':
            return mu
        elif self.family=='negative_binomial':
            return mu+self.alpha_*mu**2
        elif self.family=='gamma':
            return (mu**2)/self.alpha_
        else:
            raise ValueError(f"Unsupported family '{self.family}'.")

    def fit(self, X, y):
        Xp = self._preprocess_data(X)
        beta, alpha = self._initialize_coefficients(Xp, y)

        for iteration in range(self.max_iter):
            eta = self._compute_eta(Xp, beta)
            mu = self._compute_mu(eta)
            W, z = self._compute_weights_z(mu, y, eta)

            beta_new = self._compute_wls_solution(Xp, W, z)

            if self.regularization in ['l1','elasticnet']:
                beta_new = self._apply_regularization(beta_new, Xp, W, z)

            if self.family in ['negative_binomial','gamma']:
                alpha_new = self._update_alpha(y, mu)
            else:
                alpha_new = alpha

            converged = self._check_convergence(beta, beta_new, alpha, alpha_new)
            beta, alpha = beta_new, alpha_new
            self.alpha_ = alpha_new

            if converged:
                if self.verbose:
                    print(f"Converged in {iteration+1} iterations.")
                break
        else:
            if self.verbose:
                print("Did not converge within max_iter.")

        self.beta_ = beta
        return self

    def _preprocess_data(self, X):
        if self.encode_categorical:
            if not isinstance(X, pd.DataFrame):
                raise ValueError("X must be DataFrame when encode_categorical=True.")
            X_encoded = self._encode_categorical(X)
        else:
            if isinstance(X, np.ndarray):
                X_encoded = pd.DataFrame(X, columns=[f'Feature_{i+1}' for i in range(X.shape[1])])
            elif isinstance(X, pd.DataFrame):
                X_encoded = X.copy()
            else:
                raise ValueError("X must be a DataFrame or ndarray.")

        # Cast to float right after encoding to ensure dummies are floats
        X_encoded = X_encoded.astype(float)

        if self.scale:
            Xp,_ = self._scale_features(X_encoded)
        else:
            Xp = X_encoded.copy()

        # Ensure columns are numeric
        for col in Xp.columns:
            if not np.issubdtype(Xp[col].dtype, np.number):
                Xp[col] = pd.to_numeric(Xp[col], errors='coerce').fillna(0.0)

        if 'const' not in Xp.columns:
            Xp = sm.add_constant(Xp)

        # Final cast to float to ensure all columns are float
        Xp = Xp.astype(float)
        self.feature_names_ = Xp.columns.tolist()
        return Xp

    def _initialize_coefficients(self, Xp, y):
        n_features = Xp.shape[1]
        beta = np.zeros(n_features)
        alpha = self.alpha_
        return beta, alpha

    def _compute_eta(self, X, beta):
        return X.values @ beta

    def _compute_mu(self, eta):
        if self.family=='gamma' and self.inv_link_func==self.inverse_link_gamma_inv:
            eta = np.clip(eta,1e-10,None)
        return self.inv_link_func(eta)

    def _compute_weights_z(self, mu, y, eta):
        if self.family=='linear':
            W = 1.0
            z = y
        else:
            W = self.variance(mu)
            W = np.maximum(W,1e-5)
            z = eta + (y - mu) / W
        return W,z

    def _compute_wls_solution(self, X, W, z):
        XT_W = X.values.T * W
        XTWX = XT_W @ X.values
        XTWz = XT_W @ z

        if self.regularization in ['l2','elasticnet']:
            if 'const' in self.feature_names_:
                idx_const = self.feature_names_.index('const')
            else:
                idx_const = None
            penalty_mat = np.eye(XTWX.shape[0])
            if idx_const is not None:
                penalty_mat[idx_const, idx_const] = 0.0
            if self.regularization=='l2':
                XTWX += self.reg_lambda * penalty_mat
            else:
                XTWX += self.reg_lambda * (1-self.reg_alpha) * penalty_mat

        try:
            beta_new = np.linalg.solve(XTWX, XTWz)
        except np.linalg.LinAlgError:
            beta_new = np.linalg.lstsq(XTWX, XTWz, rcond=None)[0]
        return beta_new

    def _apply_regularization(self, beta_new, X, W, z):
        if self.regularization=='l1':
            lambda_l1 = self.reg_lambda
        elif self.regularization=='elasticnet':
            lambda_l1 = self.reg_lambda * self.reg_alpha
        else:
            lambda_l1 = 0.0

        if lambda_l1 <= 0:
            return beta_new

        if self.l1_method=='soft_thresholding':
            if 'const' in self.feature_names_:
                idx_const = self.feature_names_.index('const')
            else:
                idx_const = None
            for i in range(len(beta_new)):
                if i == idx_const:
                    continue
                val = beta_new[i]
                if val > lambda_l1:
                    val = val - lambda_l1
                elif val < -lambda_l1:
                    val = val + lambda_l1
                else:
                    val = 0.0
                beta_new[i]=val
        elif self.l1_method=='coordinate_descent':
            beta_new = self.coordinate_descent(beta_new.copy(), X.values, W, z, self.reg_lambda, self.reg_alpha)
        else:
            raise ValueError("Invalid l1_method.")
        return beta_new

    def _update_alpha(self, y, mu):
        if self.family=='negative_binomial':
            alpha=self.alpha_
            d1=self._neg_binomial_alpha_d1(y, mu, alpha)
            eps=1e-6
            d1_p=self._neg_binomial_alpha_d1(y, mu, alpha+eps)
            d1_m=self._neg_binomial_alpha_d1(y, mu, max(alpha-eps,1e-8))
            d2=(d1_p - 2*d1 + d1_m)/(eps**2)
            alpha_new=alpha - d1/d2
            alpha_new=max(alpha_new,1e-8)
            return alpha_new
        elif self.family=='gamma':
            alpha=self.alpha_
            d1,d2=self._gamma_alpha_derivatives(y,mu,alpha)
            alpha_new=alpha - d1/d2
            alpha_new=max(alpha_new,1e-8)
            return alpha_new
        else:
            return self.alpha_

    def _neg_binomial_alpha_d1(self, y, mu, alpha):
        y = np.asarray(y)
        mu = np.asarray(mu)
        a = 1/alpha
        yplus = y+a
        dig_y_a = digamma(yplus)
        dig_a = digamma(a)

        term1 = (dig_y_a - dig_a)*(-1/alpha**2)
        term2 = (-1/alpha**2)*(-np.log(1+alpha*mu)) + (1/alpha)*(-mu/(1+alpha*mu))
        term3 = y*(1/alpha - mu/(1+alpha*mu))

        d1_i = term1+term2+term3
        return np.sum(d1_i)

    def _gamma_alpha_derivatives(self, y, mu, alpha):
        y=np.asarray(y)
        mu=np.asarray(mu)
        d1_i=(np.log(alpha)+1)-digamma(alpha)+np.log(y)-(y/mu)-np.log(mu)
        d1=np.sum(d1_i)
        n=len(y)
        d2 = n*(1/alpha - polygamma(1, alpha))
        return d1,d2

    def _check_convergence(self, beta, beta_new, alpha, alpha_new):
        delta_beta=np.linalg.norm(beta_new - beta)
        if self.family in ['negative_binomial','gamma']:
            delta_alpha=abs(alpha_new - alpha)
            if self.verbose:
                print(f"    ||Δβ||={delta_beta:.6e}, Δα={delta_alpha:.6e}")
            return (delta_beta<self.tol) and (delta_alpha<self.tol)
        else:
            if self.verbose:
                print(f"    ||Δβ||={delta_beta:.6e}")
            return (delta_beta<self.tol)

    def _encode_categorical(self, X):
        X_encoded=X.copy()
        for f in self.categorical_features:
            etype=self.categorical_encoding[f]
            if etype=='nominal':
                dummies=pd.get_dummies(X_encoded[f],prefix=f,drop_first=True)
                self.dummy_columns.extend(dummies.columns.tolist())
                X_encoded=pd.concat([X_encoded,dummies],axis=1)
                X_encoded.drop(columns=[f],inplace=True)
            elif etype=='ordinal':
                mapping={cat:i for i,cat in enumerate(self.ordinal_mapping[f],start=1)}
                self.ordinal_mappings[f]=mapping
                X_encoded[f]=X_encoded[f].map(mapping)
            else:
                raise ValueError("Invalid encoding.")
        if X_encoded.columns.duplicated().any():
            X_encoded = X_encoded.loc[:, ~X_encoded.columns.duplicated()]
        return X_encoded

    def _scale_features(self, X):
        # Distinguish continuous from categorical features
        all_categorical = set(self.dummy_columns) | set(self.ordinal_mappings.keys())
        numeric_cols = [c for c in X.columns if np.issubdtype(X[c].dtype, np.number)]
        continuous_feats = [c for c in numeric_cols if c not in all_categorical and c != 'const']

        X_scaled=X.copy()
        scaler_params_={}
        if self.scale and continuous_feats:
            if self.scaling_method=='standardization':
                scaler_params_['mean']=X_scaled[continuous_feats].mean()
                scaler_params_['std']=X_scaled[continuous_feats].std().replace(0,1)
                X_scaled[continuous_feats]=(X_scaled[continuous_feats]-scaler_params_['mean'])/scaler_params_['std']
            elif self.scaling_method=='minmax':
                scaler_params_['min']=X_scaled[continuous_feats].min()
                scaler_params_['max']=X_scaled[continuous_feats].max()
                denom=(scaler_params_['max']-scaler_params_['min']).replace(0,1)
                X_scaled[continuous_feats]=(X_scaled[continuous_feats]-scaler_params_['min'])/denom
        else:
            # No scaling or no continuous feats
            scaler_params_['mean']=pd.Series([],dtype=float)
            scaler_params_['std']=pd.Series([],dtype=float)

        self.scaler_params_=scaler_params_
        self.scaler_features_=continuous_feats
        return X_scaled,scaler_params_

    def _apply_scaling(self,X):
        continuous_feats = self.scaler_features_
        X_scaled=X.copy()
        if self.scale and continuous_feats:
            if self.scaling_method=='standardization':
                X_scaled[continuous_feats]=(X_scaled[continuous_feats]-self.scaler_params_['mean'])/self.scaler_params_['std']
            elif self.scaling_method=='minmax':
                denom=(self.scaler_params_['max']-self.scaler_params_['min']).replace(0,1)
                X_scaled[continuous_feats]=(X_scaled[continuous_feats]-self.scaler_params_['min'])/denom
        return X_scaled

    def _encode_new_data(self, X):
        X_encoded=X.copy()
        for f in self.categorical_features:
            etype=self.categorical_encoding[f]
            if etype=='nominal':
                dummies=pd.get_dummies(X_encoded[f],prefix=f,drop_first=True)
                missing_cols=set(self.dummy_columns)-set(dummies.columns)
                for c in missing_cols:
                    dummies[c]=0
                dummies=dummies.reindex(columns=self.dummy_columns,fill_value=0)
                X_encoded=pd.concat([X_encoded,dummies],axis=1)
                X_encoded.drop(columns=[f],inplace=True)
            elif etype=='ordinal':
                mapping=self.ordinal_mappings.get(f)
                if mapping is None:
                    raise ValueError(f"No mapping for {f}")
                X_encoded[f]=X_encoded[f].map(mapping)
                if X_encoded[f].isnull().any():
                    raise ValueError("Unseen category in ordinal feature.")

        if X_encoded.columns.duplicated().any():
            X_encoded=X_encoded.loc[:,~X_encoded.columns.duplicated()]

        return X_encoded

    def predict_mu(self, X):
        if self.encode_categorical:
            if not isinstance(X, pd.DataFrame):
                raise ValueError("X must be DataFrame for categorical.")
            X_encoded = self._encode_new_data(X)
        else:
            if isinstance(X, pd.DataFrame):
                X_encoded = X.copy()
            elif isinstance(X, np.ndarray):
                X_encoded = pd.DataFrame(X, columns=[f'Feature_{i+1}' for i in range(X.shape[1])])
            else:
                raise ValueError("X must be a DataFrame or ndarray.")

        X_scaled = self._apply_scaling(X_encoded)

        for col in X_scaled.columns:
            if not np.issubdtype(X_scaled[col].dtype, np.number):
                X_scaled[col] = pd.to_numeric(X_scaled[col], errors='coerce').fillna(0.0)

        if 'const' not in self.feature_names_:
            X_scaled = sm.add_constant(X_scaled)
        X_scaled = X_scaled.reindex(columns=self.feature_names_, fill_value=0)

        # Final cast to ensure floats
        X_scaled = X_scaled.astype(float)

        eta = X_scaled.values @ self.beta_
        if self.family == 'gamma' and self.inv_link_func == self.inverse_link_gamma_inv:
            eta = np.clip(eta, 1e-10, None)
        mu = self.inv_link_func(eta)
        return mu

    def predict(self, X, threshold=0.5):
        mu=self.predict_mu(X)
        if self.family=='linear':
            return mu
        elif self.family=='logistic':
            return (mu>=threshold).astype(int)
        elif self.family in ['poisson','negative_binomial']:
            return np.round(mu).astype(int)
        elif self.family=='gamma':
            return mu
        else:
            raise ValueError(f"Unsupported family {self.family}.")

    def coordinate_descent(self, beta_initial, X, W, z, reg_lambda, reg_alpha):
        beta=beta_initial.copy()
        n_features=X.shape[1]

        if 'const' in self.feature_names_:
            idx_const=self.feature_names_.index('const')
        else:
            idx_const=None

        lambda_l1=reg_lambda*reg_alpha if self.regularization=='elasticnet' else reg_lambda

        for iteration in range(self.max_cd_iter):
            beta_old=beta.copy()
            for j in range(n_features):
                X_j=X[:,j]
                r_j=z-(X@beta)+beta[j]*X_j
                rho_j=(X_j*W*r_j).sum()
                denom=(X_j**2*W).sum()
                denom=max(denom,1e-8)

                if j==idx_const:
                    beta_j=rho_j/denom
                else:
                    if rho_j>lambda_l1:
                        beta_j=(rho_j-lambda_l1)/denom
                    elif rho_j<-lambda_l1:
                        beta_j=(rho_j+lambda_l1)/denom
                    else:
                        beta_j=0.0
                beta[j]=beta_j
            delta=np.linalg.norm(beta-beta_old)
            if self.verbose:
                print(f"    CD iter {iteration+1}: ||Δβ||={delta:.6e}")
            if delta<self.cd_tol:
                if self.verbose:
                    print("    CD converged.")
                break
        return beta

# Regression Demos

In [2]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import warnings

warnings.filterwarnings("ignore")

def generate_linear_data(n_samples=500, n_features=3, noise=1.0, random_seed=42):
    np.random.seed(random_seed)
    X = pd.DataFrame(
        np.random.randn(n_samples, n_features),
        columns=[f'Feature_{i+1}' for i in range(n_features)]
    )
    true_beta = np.array([3.5, -2.0, 1.0])
    intercept = 10.0
    y = intercept + X.values @ true_beta + np.random.randn(n_samples)*noise
    return X, y

def generate_logistic_data(n_samples=500, random_seed=42):
    np.random.seed(random_seed)
    X = pd.DataFrame({
        'Gender': np.random.choice(['Male','Female'], size=n_samples),
        'Department': np.random.choice(['Sales','Engineering','HR'], size=n_samples),
        'Experience': np.random.randint(1,21,size=n_samples),
        'Education': np.random.choice(['High School','Bachelor','Master','PhD'], size=n_samples)
    })

    true_beta = {
        'const': -1.0,
        'Gender_Male': 0.5,
        'Department_Engineering': 1.0,
        'Department_HR':0.75,
        'Education_Bachelor':0.2,
        'Education_Master':0.4,
        'Education_PhD':0.6,
        'Experience':0.1
    }

    X_encoded=pd.get_dummies(X,drop_first=True)
    for col in ['Gender_Male','Department_Engineering','Department_HR','Education_Bachelor','Education_Master','Education_PhD']:
        if col not in X_encoded.columns:
            X_encoded[col]=0
    eta = (true_beta['const']
           + true_beta['Gender_Male']*X_encoded['Gender_Male']
           + true_beta['Department_Engineering']*X_encoded['Department_Engineering']
           + true_beta['Department_HR']*X_encoded['Department_HR']
           + true_beta['Education_Bachelor']*X_encoded['Education_Bachelor']
           + true_beta['Education_Master']*X_encoded['Education_Master']
           + true_beta['Education_PhD']*X_encoded['Education_PhD']
           + true_beta['Experience']*X['Experience'])

    p=1/(1+np.exp(-eta))
    y=np.random.binomial(1,p)
    return X,y

def generate_poisson_data(n_samples=500,n_features=3,random_seed=42):
    np.random.seed(random_seed)
    X=pd.DataFrame(
        np.random.randn(n_samples,n_features),
        columns=[f'Feature_{i+1}' for i in range(n_features)]
    )
    true_beta=np.array([0.8,-1.2,0.5])
    intercept=1.5
    eta=intercept+X.values@true_beta
    mu=np.exp(eta)
    y=np.random.poisson(mu)
    return X,y

def generate_negative_binomial_data(n_samples=500,n_features=3,alpha=1.5,random_seed=42):
    np.random.seed(random_seed)
    X=pd.DataFrame(
        np.random.randn(n_samples,n_features),
        columns=[f'Feature_{i+1}' for i in range(n_features)]
    )
    true_beta=np.array([0.5,-1.0,0.3])
    intercept=1.0
    eta=intercept+X.values@true_beta
    mu=np.exp(eta)
    n=1/alpha
    p=n/(n+mu)
    y=np.random.negative_binomial(n,p)
    return X,y

def generate_gamma_data(n_samples=500,n_features=3,random_seed=42):
    np.random.seed(random_seed)
    X=pd.DataFrame({
        'Age':np.random.randint(20,60,size=n_samples),
        'Income':np.random.normal(50000,15000,size=n_samples),
        'Debt':np.random.normal(15000,5000,size=n_samples)
    })
    eta=0.02*X['Age']+0.0001*X['Income']+0.0003*X['Debt']
    mu=1/eta
    alpha=2.0
    y=np.random.gamma(shape=alpha,scale=mu/alpha)
    return X,y

def compare_coefficients(irls_coeffs, sm_coeffs, model_type):
    comparison_df=pd.DataFrame({
        'IRLSRegression': irls_coeffs,
        'StatsModels': sm_coeffs
    })
    print(f"\n--- {model_type} Regression Coefficients Comparison ---")
    print(comparison_df)
    print("\n")

def linear_regression_workflow():
    print("=== Linear Regression ===")
    X,y=generate_linear_data()
    irls=IRLSRegression(family='linear',link='identity',regularization='none',scale=False,encode_categorical=False,verbose=False)
    irls.fit(X,y)
    X_sm=sm.add_constant(X)
    ols=sm.OLS(y,X_sm).fit()
    irls_coeffs=pd.Series(irls.beta_,index=irls.feature_names_)
    sm_coeffs=ols.params
    compare_coefficients(irls_coeffs, sm_coeffs, "Linear")

def logistic_regression_workflow():
    print("=== Logistic Regression ===")
    X,y=generate_logistic_data()
    categorical_encoding={
        'Gender':'nominal',
        'Department':'nominal',
        'Education':'nominal'
    }
    irls=IRLSRegression(
        family='logistic',
        link='logit',
        regularization='elasticnet',
        reg_lambda=0.1,
        reg_alpha=0.7,
        l1_method='coordinate_descent',
        scale=False,
        encode_categorical=True,
        categorical_features=['Gender','Department','Education'],
        categorical_encoding=categorical_encoding,
        verbose=False
    )
    irls.fit(X,y)
    X_sm=pd.get_dummies(X,drop_first=True)
    X_sm=sm.add_constant(X_sm)
    X_sm=X_sm.astype(float)
    y=y.astype(float)
    logit=sm.Logit(y,X_sm).fit(disp=0)
    irls_coeffs=pd.Series(irls.beta_,index=irls.feature_names_)
    sm_coeffs=logit.params
    compare_coefficients(irls_coeffs, sm_coeffs, "Logistic (ElasticNet IRLS vs Unregularized SM)")

def poisson_regression_workflow():
    print("=== Poisson Regression ===")
    X,y=generate_poisson_data()
    irls=IRLSRegression(
        family='poisson',
        link='log',
        regularization='none',
        scale=False,
        encode_categorical=False,
        verbose=False
    )
    irls.fit(X,y)
    X_sm=sm.add_constant(X)
    poi=sm.GLM(y,X_sm,family=sm.families.Poisson()).fit()
    irls_coeffs=pd.Series(irls.beta_,index=irls.feature_names_)
    sm_coeffs=poi.params
    compare_coefficients(irls_coeffs, sm_coeffs, "Poisson")

def negative_binomial_regression_workflow():
    print("=== Negative Binomial Regression ===")
    X,y=generate_negative_binomial_data()
    irls=IRLSRegression(
        family='negative_binomial',
        link='log',
        regularization='none',
        scale=False,
        encode_categorical=False,
        verbose=False
    )
    irls.fit(X,y)
    X_sm=sm.add_constant(X)
    # Using alpha=1.5 as in data generation
    nb=sm.GLM(y,X_sm,family=sm.families.NegativeBinomial(alpha=1.5)).fit()
    irls_coeffs=pd.Series(irls.beta_,index=irls.feature_names_)
    sm_coeffs=nb.params
    compare_coefficients(irls_coeffs, sm_coeffs, "Negative Binomial")

def gamma_regression_workflow():
    print("=== Gamma Regression ===")
    X,y=generate_gamma_data()
    irls=IRLSRegression(
        family='gamma',
        link='inverse',
        regularization='none',
        scale=False,
        encode_categorical=False,
        verbose=False
    )
    irls.fit(X,y)
    X_sm=sm.add_constant(X)
    gamma_mod=sm.GLM(y,X_sm,family=sm.families.Gamma(link=sm.families.links.inverse_power())).fit()
    irls_coeffs=pd.Series(irls.beta_,index=irls.feature_names_)
    sm_coeffs=gamma_mod.params
    compare_coefficients(irls_coeffs, sm_coeffs, "Gamma")

def main():
    linear_regression_workflow()
    logistic_regression_workflow()
    poisson_regression_workflow()
    negative_binomial_regression_workflow()
    gamma_regression_workflow()

if __name__=='__main__':
    main()

=== Linear Regression ===

--- Linear Regression Coefficients Comparison ---
           IRLSRegression  StatsModels
const           10.037212    10.037212
Feature_1        3.478160     3.478160
Feature_2       -2.054121    -2.054121
Feature_3        0.949873     0.949873


=== Logistic Regression ===

--- Logistic (ElasticNet IRLS vs Unregularized SM) Regression Coefficients Comparison ---
                       IRLSRegression  StatsModels
const                       -0.621151    -0.626555
Experience                   0.146704     0.146833
Gender_Male                  0.031268     0.034275
Department_HR                0.799800     0.803787
Department_Sales            -0.550585    -0.553149
Education_High School       -0.369739    -0.369628
Education_Master             0.319290     0.326127
Education_PhD                0.461118     0.468041


=== Poisson Regression ===

--- Poisson Regression Coefficients Comparison ---
           IRLSRegression  StatsModels
const            1.460261   

Inconsistencies still present in the negative binomial and gamma regression tasks. Must study IRLS when dispersion parameter is present.

# Encoding Demos

In [3]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import warnings

warnings.filterwarnings("ignore")

def generate_categorical_data(n_samples=100, random_seed=42):
    np.random.seed(random_seed)
    data = {
        'Gender': np.random.choice(['Male', 'Female'], size=n_samples),
        'Department': np.random.choice(['Sales', 'Engineering', 'HR', 'Marketing'], size=n_samples),
        'Education': np.random.choice(['High School', 'Bachelor', 'Master', 'PhD'], size=n_samples),
        'Experience': np.random.randint(1, 21, size=n_samples),
        'Salary': np.random.normal(60000, 15000, size=n_samples)
    }
    df = pd.DataFrame(data)
    return df

def display_encoding_details(model, model_type):
    print(f"\n--- {model_type} Regression Encoding Details ---")
    if model.encode_categorical:
        print("Dummy Variables Created (Nominal Features):")
        print(model.dummy_columns)
        print("\nOrdinal Mappings:")
        for feature, mapping in model.ordinal_mappings.items():
            print(f"  {feature}: {mapping}")
    else:
        print("No categorical encoding was applied.")
    print("\n")

def encoding_workflow():
    print("=== Encoding Capabilities Demonstration ===")
    df = generate_categorical_data(n_samples=200)
    print("Sample of the Original Dataset:")
    print(df.head())

    X = df[['Gender', 'Department', 'Education', 'Experience']]
    y = df['Salary']

    categorical_encoding = {
        'Gender': 'nominal',
        'Department': 'nominal',
        'Education': 'ordinal'
    }
    ordinal_mapping = {
        'Education': ['High School', 'Bachelor', 'Master', 'PhD']
    }

    irls_model = IRLSRegression(
        family='linear',
        link='identity',
        regularization='none',
        scale=True,
        scaling_method='standardization',
        encode_categorical=True,
        categorical_features=['Gender', 'Department', 'Education'],
        categorical_encoding=categorical_encoding,
        ordinal_mapping=ordinal_mapping,
        verbose=True
    )
    irls_model.fit(X, y)

    print("\nTransformed Feature Names After Encoding and Scaling:")
    print(irls_model.feature_names_)
    display_encoding_details(irls_model, "Linear")

    # Manual replication of IRLS process
    X_encoded = X.copy()

    # Nominal encoding
    for feature in ['Gender', 'Department']:
        dummies = pd.get_dummies(X_encoded[feature], prefix=feature, drop_first=True)
        X_encoded = pd.concat([X_encoded, dummies], axis=1)
        X_encoded.drop(columns=[feature], inplace=True)

    # Ordinal encoding
    edu_map = {cat: i for i, cat in enumerate(ordinal_mapping['Education'], start=1)}
    X_encoded['Education'] = X_encoded['Education'].map(edu_map)

    # Cast to float after encoding
    X_encoded = X_encoded.astype(float)

    # Now we have a DataFrame with numeric columns (Experience, Education) and dummies
    scaler_mean = irls_model.scaler_params_.get('mean', pd.Series(dtype=float))
    scaler_std = irls_model.scaler_params_.get('std', pd.Series(dtype=float))
    scaler_features = irls_model.scaler_features_

    X_scaled = X_encoded.copy()
    if irls_model.scale and len(scaler_features) > 0:
        for col in scaler_features:
            X_scaled[col] = (X_scaled[col] - scaler_mean[col]) / scaler_std[col]

    # Add const and reorder columns
    X_scaled = sm.add_constant(X_scaled)
    X_scaled = X_scaled.reindex(columns=irls_model.feature_names_, fill_value=0)

    # Cast again to float to ensure all columns are float (no booleans)
    X_scaled = X_scaled.astype(float)

    print("\nSample of the Encoded and Scaled Feature Set:")
    # Show more rows
    with pd.option_context('display.max_rows', 20, 'display.max_columns', None):
        display(X_scaled.head(20))

    print("\n--- Making Predictions on New Data ---")
    new_data = pd.DataFrame({
        'Gender': ['Female', 'Male'],
        'Department': ['Engineering', 'HR'],
        'Education': ['Master', 'Bachelor'],
        'Experience': [5, 15]
    })
    print("\nNew Data for Prediction:")
    print(new_data)
    predicted_salary = irls_model.predict_mu(new_data)
    print("\nPredicted Salary (Expected Mean) for New Data:")
    print(predicted_salary)

    print("\n--- Handling Unseen Categories During Prediction ---")
    new_data_unseen = pd.DataFrame({
        'Gender': ['Female', 'Male'],
        'Department': ['Finance', 'HR'],
        'Education': ['Master', 'Bachelor'],
        'Experience': [5, 15]
    })
    print("\nNew Data with Unseen Category for 'Department':")
    print(new_data_unseen)
    try:
        predicted_salary_unseen = irls_model.predict_mu(new_data_unseen)
        print("\nPredicted Salary for New Data with Unseen Categories:")
        print(predicted_salary_unseen)
    except ValueError as e:
        print(f"\nError during prediction with unseen categories: {e}")

    print("\n--- Making Predictions on Consistent New Data ---")
    new_data_consistent = pd.DataFrame({
        'Gender': ['Female', 'Male'],
        'Department': ['Engineering', 'HR'],
        'Education': ['PhD', 'High School'],
        'Experience': [10, 3]
    })
    print("\nNew Consistent Data for Prediction:")
    print(new_data_consistent)
    predicted_salary_consistent = irls_model.predict_mu(new_data_consistent)
    print("\nPredicted Salary for Consistent New Data:")
    print(predicted_salary_consistent)

if __name__ == "__main__":
    encoding_workflow()

=== Encoding Capabilities Demonstration ===
Sample of the Original Dataset:
   Gender Department    Education  Experience        Salary
0    Male         HR          PhD          13  80382.249500
1  Female  Marketing          PhD          20  55704.802258
2    Male         HR       Master          15  60684.117599
3    Male      Sales  High School           3  65837.693642
4    Male  Marketing          PhD           8  55988.990432
    ||Δβ||=5.865498e+04
    ||Δβ||=0.000000e+00
Converged in 2 iterations.

Transformed Feature Names After Encoding and Scaling:
['const', 'Education', 'Experience', 'Gender_Male', 'Department_HR', 'Department_Marketing', 'Department_Sales']

--- Linear Regression Encoding Details ---
Dummy Variables Created (Nominal Features):
['Gender_Male', 'Department_HR', 'Department_Marketing', 'Department_Sales']

Ordinal Mappings:
  Education: {'High School': 1, 'Bachelor': 2, 'Master': 3, 'PhD': 4}



Sample of the Encoded and Scaled Feature Set:


Unnamed: 0,const,Education,Experience,Gender_Male,Department_HR,Department_Marketing,Department_Sales
0,1.0,4.0,0.405622,1.0,1.0,0.0,0.0
1,1.0,4.0,1.591155,0.0,0.0,1.0,0.0
2,1.0,3.0,0.744346,1.0,1.0,0.0,0.0
3,1.0,1.0,-1.287998,1.0,0.0,0.0,1.0
4,1.0,4.0,-0.441188,1.0,0.0,1.0,0.0
5,1.0,4.0,1.591155,0.0,0.0,0.0,1.0
6,1.0,2.0,0.913708,1.0,0.0,1.0,0.0
7,1.0,4.0,0.405622,1.0,0.0,1.0,0.0
8,1.0,4.0,1.252432,1.0,0.0,0.0,0.0
9,1.0,2.0,-0.102464,0.0,0.0,0.0,1.0



--- Making Predictions on New Data ---

New Data for Prediction:
   Gender   Department Education  Experience
0  Female  Engineering    Master           5
1    Male           HR  Bachelor          15

Predicted Salary (Expected Mean) for New Data:
[-3326.2037955   2793.30054855]

--- Handling Unseen Categories During Prediction ---

New Data with Unseen Category for 'Department':
   Gender Department Education  Experience
0  Female    Finance    Master           5
1    Male         HR  Bachelor          15

Predicted Salary for New Data with Unseen Categories:
[-3326.2037955   2793.30054855]

--- Making Predictions on Consistent New Data ---

New Consistent Data for Prediction:
   Gender   Department    Education  Experience
0  Female  Engineering          PhD          10
1    Male           HR  High School           3

Predicted Salary for Consistent New Data:
[-2217.22042314  -575.91467958]
