In [146]:
# https://github.com/azmatabedi/SCADI_obesity_levels_Dataset
# https://www.sciencedirect.com/science/article/pii/S2352340919306985
import pandas as pd
import scipy.stats as stats
import statsmodels.api as sm
import numpy as np
obesity= pd.read_csv("Obesity.csv")
obesity.head()

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,NObeyesdad
0,Female,21.0,1.62,64.0,yes,no,2.0,3.0,Sometimes,no,2.0,no,0.0,1.0,no,Public_Transportation,Normal_Weight
1,Female,21.0,1.52,56.0,yes,no,3.0,3.0,Sometimes,yes,3.0,yes,3.0,0.0,Sometimes,Public_Transportation,Normal_Weight
2,Male,23.0,1.8,77.0,yes,no,2.0,3.0,Sometimes,no,2.0,no,2.0,1.0,Frequently,Public_Transportation,Normal_Weight
3,Male,27.0,1.8,87.0,no,no,3.0,3.0,Sometimes,no,2.0,no,2.0,0.0,Frequently,Walking,Overweight_Level_I
4,Male,22.0,1.78,89.8,no,no,2.0,1.0,Sometimes,no,2.0,no,0.0,0.0,Sometimes,Public_Transportation,Overweight_Level_II


# Some preprocessing operations

In [149]:
obesity['Gender'] = obesity['Gender'].replace({'Female': 0, 'Male': 1})
obesity['family_history_with_overweight'] = obesity['family_history_with_overweight'].replace({'no': 0, 'yes': 1})
obesity['NObeyesdad'] = obesity['NObeyesdad'].replace({'Insufficient_Weight': 0, 'Normal_Weight': 0, 'Overweight_Level_I': 1, 'Overweight_Level_II': 1, 'Obesity_Type_I': 1, 'Obesity_Type_II': 1, 'Obesity_Type_III': 1})
obesity['FAVC'] = obesity['FAVC'].replace({'no': 0, 'yes': 1})
obesity['CAEC'] = obesity['CAEC'].replace({'no': 0, 'Sometimes': 1, 'Frequently': 1, 'Always': 1})
obesity['SMOKE'] = obesity['SMOKE'].replace({'no': 0, 'yes': 1})
obesity['SCC'] = obesity['SCC'].replace({'no': 0, 'yes': 1})
obesity['CALC'] = obesity['CALC'].replace({'no': 0, 'Sometimes': 1, 'Frequently': 2, 'Always': 3})
obesity['MTRANS'] = obesity['MTRANS'].replace({'Automobile': 0, 'Motorbike': 0, 'Public_Transportation': 1, 'Bike': 1, 'Walking': 1})
obesity.head()

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,NObeyesdad
0,0,21.0,1.62,64.0,1,0,2.0,3.0,1,0,2.0,0,0.0,1.0,0,1,0
1,0,21.0,1.52,56.0,1,0,3.0,3.0,1,1,3.0,1,3.0,0.0,1,1,0
2,1,23.0,1.8,77.0,1,0,2.0,3.0,1,0,2.0,0,2.0,1.0,2,1,0
3,1,27.0,1.8,87.0,0,0,3.0,3.0,1,0,2.0,0,2.0,0.0,2,1,1
4,1,22.0,1.78,89.8,0,0,2.0,1.0,1,0,2.0,0,0.0,0.0,1,1,1


In [151]:
obesity.dtypes

Gender                              int64
Age                               float64
Height                            float64
Weight                            float64
family_history_with_overweight      int64
FAVC                                int64
FCVC                              float64
NCP                               float64
CAEC                                int64
SMOKE                               int64
CH2O                              float64
SCC                                 int64
FAF                               float64
TUE                               float64
CALC                                int64
MTRANS                              int64
NObeyesdad                          int64
dtype: object

# Simple Linear Regression
## dependent variable : NObeyesdad
### NObeyesdad ~ Gender

In [152]:
# for col in obesity:
#     if col != 'NObeyesdad':

model = sm.GLM.from_formula("NObeyesdad ~ Gender", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,2.321656,9.848018999999999e-36,2.033951,2.650057
Gender,1.446891,0.0001979639,1.191086,1.757635


log - odds = 2.321656 + 1.446891x  
p_value = 1.979639e-04 < 0.05, so we reject the null hypothesis.  
95% CI for OR = (1.191086, 1.757635)  
The CI of the OR does not contain 1. So it leads to the rejection of the null hypothesis.  
So 'Gender' make little effect on 'NObeyesdad'. 
### NObeyesdad ~ Age

In [153]:
model = sm.GLM.from_formula("NObeyesdad ~ Age", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,0.01832,3.456322e-32,0.009433,0.035579
Age,1.248901,4.777634e-46,1.211288,1.287683


log - odds = 0.018320 + 1.248901x  
p_value = 4.777634e-46 < 0.05, so we reject the null hypothesis.  
95% CI for OR = (1.211288, 	1.287683)  
The CI of the OR does not contain 1. So it leads to the rejection of the null hypothesis.  
So 'Age' make little effect on 'NObeyesdad'. 
### NObeyesdad ~ Height

In [154]:
model = sm.GLM.from_formula("NObeyesdad ~ Height", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,0.022106,2.871233e-05,0.003706,0.131861
Height,17.285001,1.194972e-07,6.018354,49.643355


log - odds = 0.022106 + 17.285001x  
p_value = 1.194972e-07 < 0.05, so we reject the null hypothesis.  
95% CI for OR = (6.018354, 49.643355)  
The CI of the OR does not contain 1. So it leads to the rejection of the null hypothesis.  
So 'Height' make little effect on 'NObeyesdad'. 
### NObeyesdad ~ Weight

In [155]:
model = sm.GLM.from_formula("NObeyesdad ~ Weight", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,4.821397e-07,5.980049e-79,1.059389e-07,2e-06
Weight,1.243288,1.282348e-82,1.216033,1.271154


log - odds = 4.821397e-07 + 1.243288e+00x  
p_value = 11.282348e-82 < 0.05, so we reject the null hypothesis.  
95% CI for OR = (1.216033e+00, 1.757635)  
The CI of the OR does not contain 1. So it leads to the rejection of the null hypothesis.  
So 'Weight' make little effect on 'NObeyesdad'. 

### NObeyesdad ~ family_history_with_overweight

In [156]:
model = sm.GLM.from_formula("NObeyesdad ~ family_history_with_overweight", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,0.384892,4.7577250000000003e-17,0.307964,0.481036
family_history_with_overweight,13.360495,5.422915999999999e-87,10.332587,17.275714


log - odds = 0.384892 + 13.360495x    
p_value = 5.422916e-87 < 0.05, so we reject the null hypothesis.   
95% CI for OR = (10.332587, 17.275714)   
The CI of the OR does not contain 1. So it leads to the rejection of the null hypothesis.   
So 'family_history_with_overweight' make little effect on 'NObeyesdad'.   

### NObeyesdad ~ FAVC

In [157]:
model = sm.GLM.from_formula("NObeyesdad ~ FAVC", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,0.884615,0.3382065,0.688315,1.136898
FAVC,3.786561,1.22803e-21,2.881648,4.975641


log - odds = 0.884615 + 3.786561x   
p_value = 1.228030e-21 < 0.05, so we reject the null hypothesis.   
95% CI for OR = (2.881648, 4.975641)   
The CI of the OR does not contain 1. So it leads to the rejection of the null hypothesis.   
So 'FAVC' make little effect on 'NObeyesdad'.   

### NObeyesdad ~ FCVC

In [158]:
model = sm.GLM.from_formula("NObeyesdad ~ FCVC", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,2.381184,0.000137,1.524641,3.718933
FCVC,1.065654,0.490019,0.889618,1.276523


log - odds = 2.381184 + 1.065654x   
p_value = 0.490019 > 0.05, so we accept the null hypothesis.   
95% CI for OR = (0.889618, 1.276523)   
The CI of the OR contains 1. So it leads to the acception of the null hypothesis.   
So 'FCVC' make significant effect on 'NObeyesdad'.   


### NObeyesdad ~ NCP

In [159]:
model = sm.GLM.from_formula("NObeyesdad ~ NCP", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,7.012123,2.205467e-22,4.736942,10.380086
NCP,0.712632,1.14774e-06,0.621703,0.816861


log - odds = 7.012123 + 0.712632x   
p_value = 1.147740e-06 < 0.05, so we reject the null hypothesis.   
95% CI for OR = (0.621703, 0.816861)   
The CI of the OR does not contain 1. So it leads to the rejection of the null hypothesis.   
So 'NCP' make little effect on 'NObeyesdad'.   

### NObeyesdad ~ CAEC

In [160]:
model = sm.GLM.from_formula("NObeyesdad ~ CAEC", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,2.923077,0.000843,1.557193,5.487037
CAEC,0.948622,0.871139,0.50155,1.794204


log - odds = 2.923077 + 0.948622x   
p_value = 0.871139 > 0.05, so we accept the null hypothesis.   
95% CI for OR = (0.501550, 1.794204)   
The CI of the OR contains 1. So it leads to the acception of the null hypothesis.   
So 'CAEC' make significant effect on 'NObeyesdad'.   

### NObeyesdad ~ SMOKE

In [161]:
model = sm.GLM.from_formula("NObeyesdad ~ SMOKE", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,2.792661,4.761517e-94,2.53237,3.079705
SMOKE,0.767317,0.4186727,0.40384,1.457942


log - odds = 2.792661 + 0.767317x   
p_value = 4.186727e-01 > 0.05, so we accept the null hypothesis.   
95% CI for OR = (0.40384, 1.457942)   
The CI of the OR contains 1. So it leads to the acception of the null hypothesis.   
So 'SMOKE' make significant effect on 'NObeyesdad'.   


### NObeyesdad ~ CH2O

In [162]:
model = sm.GLM.from_formula("NObeyesdad ~ CH2O", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,0.96295,0.8188883,0.697034,1.33031
CH2O,1.715973,4.705753e-11,1.461023,2.015412


log - odds = 0.962950 + 1.715973x   
p_value = 4.705753e-11 < 0.05, so we reject the null hypothesis.   
95% CI for OR = (1.461023, 2.015412)   
The CI of the OR does not contain 1. So it leads to the rejection of the null hypothesis.   
So 'CH2O' make little effect on 'NObeyesdad'.   

### NObeyesdad ~ SCC

In [163]:
model = sm.GLM.from_formula("NObeyesdad ~ SCC", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,2.974359,4.763781e-100,2.689646,3.289211
SCC,0.284483,2.6345e-09,0.188064,0.430334


log - odds = 2.974359 + 0.284483x   
p_value = 2.634500e-09 < 0.05, so we reject the null hypothesis.   
95% CI for OR = (0.188064, 0.430334)   
The CI of the OR does not contain 1. So it leads to the rejection of the null hypothesis.   
So 'SCC' make little effect on 'NObeyesdad'.   

### NObeyesdad ~ FAF

In [164]:
model = sm.GLM.from_formula("NObeyesdad ~ FAF", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,4.475611,2.2624039999999999e-72,3.80146,5.269317
FAF,0.643306,2.490373e-14,0.574322,0.720576


log - odds = 4.475611 + 0.643306x   
p_value = 2.490373e-14 < 0.05, so we reject the null hypothesis.   
95% CI for OR = (0.574322, 0.720576)   
The CI of the OR does not contain 1. So it leads to the rejection of the null hypothesis.   
So 'FAF' make little effect on 'NObeyesdad'.   

### NObeyesdad ~ TUE

In [165]:
model = sm.GLM.from_formula("NObeyesdad ~ TUE", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,3.533419,1.371834e-62,3.04682,4.097732
TUE,0.70428,1.071588e-05,0.602502,0.823252


log - odds = 3.533419 + 0.704280x   
p_value = 1.071588e-05 < 0.05, so we reject the null hypothesis.   
95% CI for OR = (0.602502, 0.823252)   
The CI of the OR does not contain 1. So it leads to the rejection of the null hypothesis.   
So 'TUE' make little effect on 'NObeyesdad'.   

### NObeyesdad ~ CALC

In [166]:
model = sm.GLM.from_formula("NObeyesdad ~ CALC", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,1.97885,4.928373e-17,1.687157,2.320974
CALC,1.620223,4.763473e-07,1.342777,1.954995


log - odds = 1.978850 + 1.620223x   
p_value = 4.763473e-07 < 0.05, so we reject the null hypothesis.   
95% CI for OR = (1.342777, 1.954995)   
The CI of the OR does not contain 1. So it leads to the rejection of the null hypothesis.   
So 'CALC' make little effect on 'NObeyesdad'.   

### NObeyesdad ~ MTRANS

In [167]:
model = sm.GLM.from_formula("NObeyesdad ~ MTRANS", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,3.824742,6.016163e-32,3.058671,4.782683
MTRANS,0.668353,0.001452903,0.521534,0.856503


log - odds = 3.824742 + 0.668353x   
p_value = 1.452903e-03 < 0.05, so we reject the null hypothesis.   
95% CI for OR = (0.521534, 0.856503)   
The CI of the OR does not contain 1. So it leads to the rejection of the null hypothesis.   
So 'CALC' make little effect on 'NObeyesdad'. 

In [168]:
# Gender Age Height Weight family_history_with_overweight FAVC FCVC NCP CAEC SMOKE CH2O
# SCC FAF TUE CALC MTRANS NObeyesdad

# Multiple Linear Regression


In [169]:
obesity.head()

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,NObeyesdad
0,0,21.0,1.62,64.0,1,0,2.0,3.0,1,0,2.0,0,0.0,1.0,0,1,0
1,0,21.0,1.52,56.0,1,0,3.0,3.0,1,1,3.0,1,3.0,0.0,1,1,0
2,1,23.0,1.8,77.0,1,0,2.0,3.0,1,0,2.0,0,2.0,1.0,2,1,0
3,1,27.0,1.8,87.0,0,0,3.0,3.0,1,0,2.0,0,2.0,0.0,2,1,1
4,1,22.0,1.78,89.8,0,0,2.0,1.0,1,0,2.0,0,0.0,0.0,1,1,1


## Group 1

In [170]:
model = sm.GLM.from_formula("NObeyesdad ~ family_history_with_overweight + FAVC + FCVC + NCP", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,0.729065,0.3946232,0.352206,1.509162
family_history_with_overweight,14.857184,2.064571e-78,11.204951,19.699856
FAVC,2.634636,6.646928e-09,1.899062,3.655124
FCVC,1.007577,0.9446951,0.814058,1.247099
NCP,0.560099,1.437e-12,0.477068,0.65758


In [171]:
res.aic

1908.4540283090214

Fetures: "family_history_with_overweight + FCVC + NCP + CAEC" has the AIC: 1908.4540283090214.     
p-value of FCVC is bigger than 0.05.    


## Group 2

In [172]:
model = sm.GLM.from_formula("NObeyesdad ~ FAF + TUE + CALC + MTRANS", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,5.218283,9.481765e-26,3.832474,7.105195
FAF,0.660827,1.756655e-12,0.588963,0.741458
TUE,0.764564,0.001190281,0.65,0.899319
CALC,1.515037,1.890182e-05,1.252427,1.832712
MTRANS,0.693458,0.005261113,0.536242,0.896766


In [173]:
res.aic

2350.1018011681927

Fetures: "FAF + TUE + CALC + MTRANS" has the AIC: 2350.1018011681927.     
None of the variable's p-value is bigger than 0.05.  

## Group 3

In [174]:
model = sm.GLM.from_formula("NObeyesdad ~ CAEC + SMOKE + CH2O + SCC", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,0.85863,0.7020824,0.393218,1.874904
CAEC,1.169489,0.6482671,0.596826,2.29163
SMOKE,0.903037,0.7608992,0.468177,1.74181
CH2O,1.746621,2.812932e-11,1.482108,2.058341
SCC,0.274686,2.161482e-09,0.179917,0.419373


In [175]:
res.aic

2369.810217326713

Fetures: "CAEC + SMOKE + CH2O + SCC" has the AIC: 2369.810217326713.     
p-value of CAEC and SMOKE are bigger than 0.05.   


## Group 4

In [176]:
model = sm.GLM.from_formula("NObeyesdad ~ Gender + Age + Height + Weight", family=sm.families.Binomial(),data=obesity)
res = model.fit()
# res.summary()
stats = pd.DataFrame(np.exp(res.params), columns = ["OddsRatio"])
stats["P > |z|"] = res.pvalues
stats[["2.5%", "97.5%"]] = np.exp(res.conf_int())
stats

Unnamed: 0,OddsRatio,P > |z|,2.5%,97.5%
Intercept,3.0714209999999998e+66,1.677692e-10,1.22628e+46,7.69288e+86
Gender,0.03969162,0.006306886,0.003918906,0.4020063
Age,1.005552,0.9070259,0.9163236,1.10347
Height,1.482838e-80,3.975043e-11,3.038969e-104,7.235373e-57
Weight,9.365225,2.180076e-11,4.864508,18.03008


In [177]:
res.aic

75.43578337705432

Fetures: "Gender + Age + Height + Weight" has the AIC: 75.43578337705432.        
p-value of Age is bigger than 0.05.    

The features in Group 4 is the best choice. Because it has the lowest AIC value.