In [73]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, LabelEncoder, PowerTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
data = sm.datasets.get_rdataset("dietox", "geepack").data

In [3]:
data.head()

Unnamed: 0,Pig,Evit,Cu,Litter,Start,Weight,Feed,Time
0,4601,Evit000,Cu000,1,26.5,26.5,,1
1,4601,Evit000,Cu000,1,26.5,27.59999,5.200005,2
2,4601,Evit000,Cu000,1,26.5,36.5,17.6,3
3,4601,Evit000,Cu000,1,26.5,40.29999,28.5,4
4,4601,Evit000,Cu000,1,26.5,49.09998,45.200001,5


### Linear Mixed Model

In [46]:
df_X = data[['Weight','Time','Pig']]

In [47]:
md = smf.mixedlm("Weight ~ Time", df_X, groups=data["Pig"])

In [48]:
mdf = md.fit()
print(mdf.summary())

         Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: Weight    
No. Observations: 861     Method:             REML      
No. Groups:       72      Scale:              11.3669   
Min. group size:  11      Log-Likelihood:     -2404.7753
Max. group size:  12      Converged:          Yes       
Mean group size:  12.0                                  
--------------------------------------------------------
             Coef.  Std.Err.    z    P>|z| [0.025 0.975]
--------------------------------------------------------
Intercept    15.724    0.788  19.952 0.000 14.179 17.268
Time          6.943    0.033 207.939 0.000  6.877  7.008
Group Var    40.394    2.149                            



In [49]:
df_X

Unnamed: 0,Weight,Time,Pig
0,26.50000,1,4601
1,27.59999,2,4601
2,36.50000,3,4601
3,40.29999,4,4601
4,49.09998,5,4601
...,...,...,...
856,73.19995,8,8442
857,81.69995,9,8442
858,90.29999,10,8442
859,96.00000,11,8442


In [50]:
predic = mdf.predict(df_X)

In [52]:
mean_squared_error(predic, df_X['Weight'])

51.26223595860064

### OLS

In [29]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
X = data[['Time','Pig']]
y = data['Weight']

In [62]:
clf = LinearRegression().fit(X,y)
ols_predict = clf.predict(X)

In [63]:
mean_squared_error(ols_predict, y)

51.25143564415134

### Ridge Regression

In [26]:
clf = Ridge(alpha=10000).fit(X,y)

In [27]:
ridge_predict = clf.predict(X)

In [28]:
mean_squared_error(ridge_predict, y)

191.35861348505742

### Lasso

In [45]:
clf = Lasso(alpha=0.001).fit(X,y)

In [46]:
lasso_predict = clf.predict(X)

In [47]:
mean_squared_error(lasso_predict, y)

51.251435728526545

### Elastic 

In [86]:
regr = ElasticNet(random_state=0, alpha=0.00001, l1_ratio=1).fit(X,y)

In [87]:
elastic_predict = regr.predict(X)

In [88]:
mean_squared_error(elastic_predict, y)

51.2514356441598