# Chapter 13: General Linear Models

In [1]:
import numpy as np
import pandas as pd

import scipy.stats
import statsmodels.api as sm



## Binary Outcomes

In [2]:
admit = pd.read_csv("../data/admit.csv")
print(admit.dtypes)
admit.head(6)

admit      int64
gre        int64
gpa      float64
rank       int64
dtype: object


Unnamed: 0,admit,gre,gpa,rank
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.0,1
3,1,640,3.19,4
4,0,520,2.93,4
5,1,760,3.0,2


### Baseline Model

In [3]:
def LLbinary(pi):
    p = np.where(admit.admit == 1, pi, 1-pi)
    LL = np.sum(np.log(p))
    return(-1*LL)
res1 = scipy.optimize.minimize(LLbinary, x0 = .5, method = 'Nelder-Mead')
print(res1)

 final_simplex: (array([[0.31748047],
       [0.31757812]]), array([249.98825913, 249.98826441]))
           fun: 249.98825912954734
       message: 'Optimization terminated successfully.'
          nfev: 26
           nit: 13
        status: 0
       success: True
             x: array([0.31748047])

### Logistic Regression

In [4]:
def LLbinary(params):
    b0, b1, b2, b3 = params
    X = b0 + b1*admit['gre'] + b2*admit['gpa'] + b3*admit['rank']
    pi = np.exp(X) / (1+np.exp(X))
    p = np.where(admit.admit == 1, pi, 1-pi)
    LL = np.sum(np.log(p))
    return(-1*LL)
res2 = scipy.optimize.minimize(LLbinary, x0 = [0, 0, 0, 0], method = 'Nelder-Mead')
print(res2)

 final_simplex: (array([[-0.08354163,  0.00162173, -0.00901617, -0.66485216],
       [-0.08352002,  0.00162124, -0.00896192, -0.66478994],
       [-0.08352163,  0.0016213 , -0.00892396, -0.66488047],
       [-0.08351163,  0.00162096, -0.0089236 , -0.66479992],
       [-0.08354051,  0.00162178, -0.00898158, -0.66491433]]), array([234.321968  , 234.32196803, 234.3219681 , 234.32196815,
       234.32196821]))
           fun: 234.3219680023034
       message: 'Optimization terminated successfully.'
          nfev: 413
           nit: 232
        status: 0
       success: True
             x: array([-0.08354163,  0.00162173, -0.00901617, -0.66485216])

### Probit Regression

In [5]:
def LLbinary(params):
    b0, b1, b2, b3 = params
    X = b0 + b1*admit['gre'] + b2*admit['gpa'] + b3*admit['rank']
    pi = scipy.stats.norm.cdf(X)
    p = np.where(admit.admit == 1, pi, 1-pi)
    LL = np.sum(np.log(p))
    return(-1*LL)
res3 = scipy.optimize.minimize(LLbinary, x0 = [0, 0, 0, 0], method = 'Nelder-Mead')
print(res3)

 final_simplex: (array([[ 0.07532397,  0.00075881, -0.00146091, -0.40348545],
       [ 0.07534139,  0.00075917, -0.00155737, -0.40340469],
       [ 0.07530224,  0.00075851, -0.00138884, -0.40349914],
       [ 0.07534438,  0.00075928, -0.00151075, -0.40350541],
       [ 0.07530275,  0.00075866, -0.00142106, -0.40344374]]), array([235.06691095, 235.06691117, 235.06691119, 235.06691123,
       235.06691171]))
           fun: 235.06691094815426
       message: 'Optimization terminated successfully.'
          nfev: 365
           nit: 215
        status: 0
       success: True
             x: array([ 0.07532397,  0.00075881, -0.00146091, -0.40348545])

## Count Outcomes

In [6]:
student = pd.read_csv("../data/student.csv")
print(student.dtypes)
student.head(6)

id         int64
gender     int64
math       int64
prog       int64
daysabs    int64
dtype: object


Unnamed: 0,id,gender,math,prog,daysabs
0,1001,0,63,2,4
1,1002,0,27,2,4
2,1003,1,20,2,2
3,1004,1,16,2,3
4,1005,1,2,2,3
5,1006,1,71,2,13


### Baseline Model

In [7]:
def LLpois(lam):
    p = scipy.stats.poisson.pmf(student['daysabs'], lam)
    LL = np.sum(np.log(p))
    return(-1*LL)
res4 = scipy.optimize.minimize(LLpois, x0 = 10, method = 'Nelder-Mead')
print(res4)

 final_simplex: (array([[5.95544434],
       [5.9553833 ]]), array([1550.50922948, 1550.50922948]))
           fun: 1550.5092294752835
       message: 'Optimization terminated successfully.'
          nfev: 36
           nit: 18
        status: 0
       success: True
             x: array([5.95544434])

### Poisson Regression

In [8]:
def LLpois(params):
    b0, b1, b2, b3 = params
    X = b0 + b1*student['gender'] + b2*student['math'] + b3*student['prog']
    lam = np.exp(X)
    p = scipy.stats.poisson.pmf(student['daysabs'], lam)
    LL = np.sum(np.log(p))
    return(-1*LL)
res5 = scipy.optimize.minimize(LLpois, x0 = [np.log(5.96), 0, 0, 0], method = 'Nelder-Mead')
print(res5)

 final_simplex: (array([[ 3.25203163,  0.23840155, -0.00763839, -0.60657528],
       [ 3.2519947 ,  0.23839695, -0.00763828, -0.60656324],
       [ 3.25196906,  0.2383763 , -0.00763896, -0.60650955],
       [ 3.25196358,  0.238387  , -0.00763697, -0.6065396 ],
       [ 3.25211297,  0.23840687, -0.00763931, -0.60658789]]), array([1324.39470781, 1324.39470805, 1324.39470866, 1324.39470924,
       1324.39470944]))
           fun: 1324.3947078083743
       message: 'Optimization terminated successfully.'
          nfev: 254
           nit: 150
        status: 0
       success: True
             x: array([ 3.25203163,  0.23840155, -0.00763839, -0.60657528])

## Functions in R and Python

In [9]:
res6 = sm.formula.glm("admit ~ gre + gpa + rank", family=sm.families.Binomial(), data=admit).fit()
res6.summary()

0,1,2,3
Dep. Variable:,admit,No. Observations:,400.0
Model:,GLM,Df Residuals:,396.0
Model Family:,Binomial,Df Model:,3.0
Link Function:,Logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-229.72
Date:,"Sat, 16 Jul 2022",Deviance:,459.44
Time:,21:35:40,Pearson chi2:,399.0
No. Iterations:,4,Pseudo R-squ. (CS):,0.09637
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-3.4495,1.133,-3.045,0.002,-5.670,-1.229
gre,0.0023,0.001,2.101,0.036,0.000,0.004
gpa,0.7770,0.327,2.373,0.018,0.135,1.419
rank,-0.5600,0.127,-4.405,0.000,-0.809,-0.311


In [10]:
res7 = sm.formula.glm("daysabs ~ gender + math + prog", family=sm.families.Poisson(), data=student).fit()
res7.summary()

0,1,2,3
Dep. Variable:,daysabs,No. Observations:,314.0
Model:,GLM,Df Residuals:,310.0
Model Family:,Poisson,Df Model:,3.0
Link Function:,Log,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-1324.4
Date:,"Sat, 16 Jul 2022",Deviance:,1765.5
Time:,21:35:40,Pearson chi2:,2030.0
No. Iterations:,5,Pseudo R-squ. (CS):,0.7631
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3.2548,0.081,40.008,0.000,3.095,3.414
gender,0.2355,0.047,5.039,0.000,0.144,0.327
math,-0.0076,0.001,-8.270,0.000,-0.009,-0.006
prog,-0.6073,0.036,-16.779,0.000,-0.678,-0.536
