In [1]:
import numpy as np
import pandas as pd
from matplotlib.pyplot import subplots

import statsmodels.api as sm

from statsmodels.stats.outliers_influence import variance_inflation_factor as VIF
from statsmodels.stats.anova import anova_lm

from ISLP import load_data
from ISLP.models import (ModelSpec as MS,
                         summarize,
                         poly)

# Q1-Q3

In [2]:
Auto = load_data('Auto')
Auto.columns


Index(['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'year', 'origin', 'name'],
      dtype='object')

In [3]:
print(Auto.shape)
Auto

(392, 9)


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,year,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
387,27.0,4,140.0,86,2790,15.6,82,1,ford mustang gl
388,44.0,4,97.0,52,2130,24.6,82,2,vw pickup
389,32.0,4,135.0,84,2295,11.6,82,1,dodge rampage
390,28.0,4,120.0,79,2625,18.6,82,1,ford ranger


In [4]:
Auto.describe().iloc[:,:4]

Unnamed: 0,mpg,cylinders,displacement,horsepower
count,392.0,392.0,392.0,392.0
mean,23.445918,5.471939,194.41199,104.469388
std,7.805007,1.705783,104.644004,38.49116
min,9.0,3.0,68.0,46.0
25%,17.0,4.0,105.0,75.0
50%,22.75,4.0,151.0,93.5
75%,29.0,8.0,275.75,126.0
max,46.6,8.0,455.0,230.0


In [5]:
X = pd.DataFrame({'intercept': np.ones(Auto.shape[0]),
                  'displacement': Auto['displacement']})

X = MS(['displacement']).fit_transform(Auto)

#print(type(Auto))
# X=Auto[['displacement']]
# X['intercept']=1

X[:4]


Unnamed: 0,intercept,displacement
0,1.0,307.0
1,1.0,350.0
2,1.0,318.0
3,1.0,304.0


In [6]:
y = Auto['mpg']
model = sm.OLS(y, X)
results = model.fit()

In [7]:
results.summary()

0,1,2,3
Dep. Variable:,mpg,R-squared:,0.648
Model:,OLS,Adj. R-squared:,0.647
Method:,Least Squares,F-statistic:,718.7
Date:,"Sat, 16 Sep 2023",Prob (F-statistic):,1.66e-90
Time:,22:49:53,Log-Likelihood:,-1156.4
No. Observations:,392,AIC:,2317.0
Df Residuals:,390,BIC:,2325.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,35.1206,0.494,71.033,0.000,34.149,36.093
displacement,-0.0601,0.002,-26.808,0.000,-0.064,-0.056

0,1,2,3
Omnibus:,41.308,Durbin-Watson:,0.926
Prob(Omnibus):,0.0,Jarque-Bera (JB):,61.139
Skew:,0.709,Prob(JB):,5.3e-14
Kurtosis:,4.317,Cond. No.,466.0


In [8]:
# Q1

round(results.params['displacement'], 2)

-0.06

In [9]:
# Q2

round(results.rsquared, 2)

0.65

In [10]:
terms = Auto.columns.drop(['name', 'mpg'])
print(terms)
X = MS(terms).fit_transform(Auto)

X[:4]

Index(['cylinders', 'displacement', 'horsepower', 'weight', 'acceleration',
       'year', 'origin'],
      dtype='object')


Unnamed: 0,intercept,cylinders,displacement,horsepower,weight,acceleration,year,origin
0,1.0,8,307.0,130,3504,12.0,70,1
1,1.0,8,350.0,165,3693,11.5,70,1
2,1.0,8,318.0,150,3436,11.0,70,1
3,1.0,8,304.0,150,3433,12.0,70,1


In [11]:
model2 = sm.OLS(y, X)
results2 = model2.fit()

In [12]:
results2.summary()

0,1,2,3
Dep. Variable:,mpg,R-squared:,0.821
Model:,OLS,Adj. R-squared:,0.818
Method:,Least Squares,F-statistic:,252.4
Date:,"Sat, 16 Sep 2023",Prob (F-statistic):,2.04e-139
Time:,22:49:53,Log-Likelihood:,-1023.5
No. Observations:,392,AIC:,2063.0
Df Residuals:,384,BIC:,2095.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,-17.2184,4.644,-3.707,0.000,-26.350,-8.087
cylinders,-0.4934,0.323,-1.526,0.128,-1.129,0.142
displacement,0.0199,0.008,2.647,0.008,0.005,0.035
horsepower,-0.0170,0.014,-1.230,0.220,-0.044,0.010
weight,-0.0065,0.001,-9.929,0.000,-0.008,-0.005
acceleration,0.0806,0.099,0.815,0.415,-0.114,0.275
year,0.7508,0.051,14.729,0.000,0.651,0.851
origin,1.4261,0.278,5.127,0.000,0.879,1.973

0,1,2,3
Omnibus:,31.906,Durbin-Watson:,1.309
Prob(Omnibus):,0.0,Jarque-Bera (JB):,53.1
Skew:,0.529,Prob(JB):,2.95e-12
Kurtosis:,4.46,Cond. No.,85900.0


In [13]:
# Q3
round(results2.params['displacement'], 2)

0.02

# Q4-Q21

In [14]:
beta = np.array([[50, 20, .07, 35, .01, -10]])
beta = np.transpose(beta)



In [15]:
#Q4

X0 = 1
X1 = 4
X2 = 100
X3 = 1
X4 = X1 * X2
X5 = X1 * X3

X = np.array([[X0, X1, X2, X3, X4, X5]])
y = np.matmul(X, beta)

print(y)

[[136.]]
