In [1]:
import os
from typing import Dict, NamedTuple, Union

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

In [2]:
class Config(NamedTuple):
    data_1: str = "../pre_vertical.csv"
    data_2: str = "../2way_anova_portrait.csv"
    dirctory: str = "../images/dataviz"
    savefile: bool = False
    kwarg_savefig: Dict[str, Union[str, int, float]] = {
        "facecolor": "white",
        "dpi": 300,
        "bbox_inches": "tight",
        "pad_inches": 0.05
    }

In [3]:
conf = Config(savefile=True)

In [4]:
os.makedirs(conf.dirctory, exist_ok=True)

In [5]:
data_1 = pd.read_csv(conf.data_1, index_col=0)
data_2 = pd.read_csv(conf.data_2, index_col=0)

In [6]:
df_1 = data_1[data_1.iloc[:, 1] >= 2]
df_1.index = [f"day1_{i+1}" for i in range(len(df_1))]
df_1.head()

Unnamed: 0,time,size
day1_1,5.13,2.0
day1_2,6.55,2.5
day1_3,5.36,2.97
day1_4,5.81,2.0
day1_5,4.81,2.5


In [7]:
df_2 = data_2[data_2.n_clip == 0]
df_2.index = [f"day2_{i+1}" for i in range(len(df_2))]
df_2.head()

Unnamed: 0,time,size,n_clip
day2_1,5.94,2.0,0.0
day2_2,5.75,2.5,0.0
day2_3,5.65,2.97,0.0
day2_4,5.44,2.0,0.0
day2_5,5.41,2.5,0.0


In [8]:
df = pd.concat([df_1, df_2.iloc[:, :-1]], axis=0)

In [54]:
df_sep = [
    df.T.filter(regex="day1_").T,
    df.T.filter(regex="day2_").T
]

In [55]:
df_exog = df_sep[0].iloc[:, -1]

In [56]:
df_endog = df_sep[0].time

In [57]:
df_endog.shape

(15,)

In [58]:
x = sm.add_constant(df_exog)

In [59]:
model = sm.OLS(df_endog, x)

In [60]:
result = model.fit()

In [61]:
result.summary()



0,1,2,3
Dep. Variable:,time,R-squared:,0.023
Model:,OLS,Adj. R-squared:,-0.052
Method:,Least Squares,F-statistic:,0.3119
Date:,"Thu, 14 Oct 2021",Prob (F-statistic):,0.586
Time:,17:42:23,Log-Likelihood:,-18.957
No. Observations:,15,AIC:,41.91
Df Residuals:,13,BIC:,43.33
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,6.8979,1.512,4.563,0.001,3.632,10.164
size,-0.3349,0.600,-0.559,0.586,-1.630,0.961

0,1,2,3
Omnibus:,3.651,Durbin-Watson:,1.943
Prob(Omnibus):,0.161,Jarque-Bera (JB):,1.739
Skew:,0.813,Prob(JB):,0.419
Kurtosis:,3.375,Cond. No.,18.5


In [62]:
result.aic

41.91346651783479

In [63]:
mse = (result.resid  ** 2).sum()

In [64]:
x.shape

(15, 2)

In [66]:
model.df_model

1.0

In [67]:
result.llf

-18.956733258917396

In [68]:
from itertools import combinations

In [85]:
l_comb = list(combinations(df.columns, 2))

df_separated = [df.loc[:, list(comb)] for comb in l_comb]

In [86]:
df_separated

[         time  size
 day1_1   5.13  2.00
 day1_2   6.55  2.50
 day1_3   5.36  2.97
 day1_4   5.81  2.00
 day1_5   4.81  2.50
 day1_6   5.34  2.97
 day1_7   6.83  2.00
 day1_8   7.10  2.50
 day1_9   6.10  2.97
 day1_10  5.59  2.00
 day1_11  6.73  2.50
 day1_12  5.62  2.97
 day1_13  6.28  2.00
 day1_14  8.21  2.50
 day1_15  5.50  2.97
 day2_1   5.94  2.00
 day2_2   5.75  2.50
 day2_3   5.65  2.97
 day2_4   5.44  2.00
 day2_5   5.41  2.50
 day2_6   5.00  2.97
 day2_7   6.25  2.00
 day2_8   6.41  2.50
 day2_9   5.69  2.97
 day2_10  6.32  2.00
 day2_11  6.09  2.50
 day2_12  4.46  2.97
 day2_13  5.39  2.00
 day2_14  5.87  2.50
 day2_15  4.81  2.97]