# Chapter 13: General Linear Models

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import scipy.stats

In [2]:
admit = pd.read_csv("../data/admit.csv")
admit.head(6)

Unnamed: 0,admit,gre,gpa,rank
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.0,1
3,1,640,3.19,4
4,0,520,2.93,4
5,1,760,3.0,2


## Baseline Model

In [3]:
def LLbinary(pi):
    p = np.where(admit.admit == 1, pi, 1-pi)
    LL = np.sum(np.log(p))
    return(-1*LL)
constraint = ({'type': 'eq', 'fun': lambda x: x[0]+(1-x[0])})
res1 = scipy.optimize.minimize(LLbinary, x0 = .5, constraints=constraint)
print(res1)

     fun: 249.98825877745776
     jac: array([0.])
 message: 'Positive directional derivative for linesearch'
    nfev: 29
     nit: 12
    njev: 8
  status: 8
 success: False
       x: array([0.31749999])


  LL = np.sum(np.log(p))
  LL = np.sum(np.log(p))
  LL = np.sum(np.log(p))


## Logistic Regression

In [4]:
def LLbinary(params):
    b0, b1, b2, b3 = params
    X = b0 + b1*admit['gre'] + b2*admit['gpa'] + b3*admit['rank']
    pi = np.exp(X)/(1+np.exp(X))
    p = np.where(admit.admit == 1, pi, 1-pi)
    LL = np.sum(np.log(p))
    return(-1*LL)
constraint = ({'type': 'eq', 'fun': lambda x: x[0]+(1-x[0])})
res2 = scipy.optimize.minimize(LLbinary, x0 = [0, 0, 0, 0], constraints=constraint)
print(res2)

     fun: 277.2588722239781
     jac: array([   73.        , 38940.26725769,   234.85000992,   224.00000763])
 message: 'Singular matrix C in LSQ subproblem'
    nfev: 5
     nit: 1
    njev: 1
  status: 6
 success: False
       x: array([0., 0., 0., 0.])


## Probit Regression

In [5]:
def LLbinary(params):
    b0, b1, b2, b3 = params
    X = b0 + b1*admit['gre'] + b2*admit['gpa'] + b3*admit['rank']
    pi = scipy.stats.norm.cdf(X)
    p = np.where(admit.admit == 1, pi, 1-pi)
    LL = np.sum(np.log(p))
    return(-1*LL)
constraint = ({'type': 'eq', 'fun': lambda x: x[0]+(1-x[0])})
res3 = scipy.optimize.minimize(LLbinary, x0 = [0, 0, 0, 0], constraints=constraint)
print(res3)

     fun: 277.2588722239781
     jac: array([  116.49115372, 62139.93015289,   374.7664032 ,   357.45230103])
 message: 'Singular matrix C in LSQ subproblem'
    nfev: 5
     nit: 1
    njev: 1
  status: 6
 success: False
       x: array([0., 0., 0., 0.])


## Count Outcomes

In [6]:
student = pd.read_csv("../data/student.csv")
student.head(6)

Unnamed: 0,id,gender,math,prog,daysabs
0,1001,0,63,2,4
1,1002,0,27,2,4
2,1003,1,20,2,2
3,1004,1,16,2,3
4,1005,1,2,2,3
5,1006,1,71,2,13


In [7]:
def LLpois(params):
    p = scipy.stats.poisson.ppf(student['daysabs'], params)
    LL = np.sum(np.log(p))
    return(-1*LL)
constraint = ({'type': 'eq', 'fun': lambda x: x[0]+(1-x[0])})
res4 = scipy.optimize.minimize(LLpois, x0 = 10, constraints=constraint)
print(res4)

     fun: nan
     jac: array([nan])
 message: 'Inequality constraints incompatible'
    nfev: 2
     nit: 1
    njev: 1
  status: 4
 success: False
       x: array([10.])


  LL = np.sum(np.log(p))


## Poisson Regression

In [8]:
def LLpois(params):
    b0, b1, b2, b3 = params
    X = b0 + b1*student['gender'] + b2*student['math'] + b3*student['prog']
    lam = np.exp(X)
    p = scipy.stats.poisson.ppf(student['daysabs'], lam)
    LL = np.sum(np.log(p))
    return(-1*LL)
constraint = ({'type': 'eq', 'fun': lambda x: x[0]+(1-x[0])})
res5 = scipy.optimize.minimize(LLpois, x0 = [np.log(5.96), 0, 0, 0], constraints=constraint)
print(res5)

     fun: nan
     jac: array([nan, nan, nan, nan])
 message: 'Singular matrix C in LSQ subproblem'
    nfev: 5
     nit: 1
    njev: 1
  status: 6
 success: False
       x: array([1.78507048, 0.        , 0.        , 0.        ])


  LL = np.sum(np.log(p))
