Import the modules we are going to use.

In [1]:
import statsmodels.api as sm
import pandas as pd
import numpy as np

Load in our data. **Note that the `display()` function does not work in the default Python console. Calling `display` is equivalent to just typing the variable name and just pressing "Enter" to have Python display the variable.**

In [2]:
X = np.loadtxt('h95_ih_iy_adult_male.txt')
display(X[:5, :])

array([[328., 370.],
       [305., 384.],
       [331., 411.],
       [353., 441.],
       [367., 441.]])

Extract the different vowel categories from our data set.

In [3]:
iy = np.vstack((X[:,0].T, np.zeros(45))).T
ih = np.vstack((X[:,1], np.ones(45))).T
display(iy[:5,:])
display(ih[:5,:])

array([[328.,   0.],
       [305.,   0.],
       [331.,   0.],
       [353.,   0.],
       [367.,   0.]])

array([[370.,   1.],
       [384.,   1.],
       [411.,   1.],
       [441.,   1.],
       [441.,   1.]])

Stack our two matrices on top of each other vertically and then make a data frame.

In [4]:
vowels = np.vstack((iy, ih))
d = pd.DataFrame({'f1': vowels[:,0], 'vowel':vowels[:,1]})
display(d)

Unnamed: 0,f1,vowel
0,328.0,0.0
1,305.0,0.0
2,331.0,0.0
3,353.0,0.0
4,367.0,0.0
...,...,...
85,432.0,1.0
86,396.0,1.0
87,429.0,1.0
88,416.0,1.0


Add the ones column to the data frame.

In [5]:
d = sm.add_constant(d)
display(d)

Unnamed: 0,const,f1,vowel
0,1.0,328.0,0.0
1,1.0,305.0,0.0
2,1.0,331.0,0.0
3,1.0,353.0,0.0
4,1.0,367.0,0.0
...,...,...,...
85,1.0,432.0,1.0
86,1.0,396.0,1.0
87,1.0,429.0,1.0
88,1.0,416.0,1.0


Fit the logistic regression and display the results.

In [6]:
m = sm.Logit(d.vowel, d[['const', 'f1']]).fit()
m.summary()

Optimization terminated successfully.
         Current function value: 0.192784
         Iterations 9


0,1,2,3
Dep. Variable:,vowel,No. Observations:,90.0
Model:,Logit,Df Residuals:,88.0
Method:,MLE,Df Model:,1.0
Date:,"Thu, 12 May 2022",Pseudo R-squ.:,0.7219
Time:,14:13:43,Log-Likelihood:,-17.351
converged:,True,LL-Null:,-62.383
Covariance Type:,nonrobust,LLR p-value:,2.304e-21

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-35.0865,8.376,-4.189,0.000,-51.503,-18.670
f1,0.0914,0.022,4.164,0.000,0.048,0.134
