# Load files

In [71]:
import pandas as pd
import numpy as np
from derived import categories_derived
data_diam = pd.read_csv('data.csv', dtype =  np.int64)
print(f'data_diam:\n{data_diam}')
cat_derivied = categories_derived(data_diam['amount of sleep'])
data_diam['amount of sleep'] = cat_derivied
print(type(data_diam['amount of sleep']))
print(data_diam['amount of sleep'].to_numpy())

data_diam:
   mood  amount of sleep  physical activity  cognative load  water  \
0     4                9                  5               2      2   
1     5                6                  1               3      1   
2     4               10                  4               2      2   
3     1                8                  4               5      4   
4     2                7                  1               3      4   
5     3                8                  2               2      4   

   quality of sleep  
0                 5  
1                 3  
2                 4  
3                 2  
4                 2  
5                 3  
boundaries: [ 7  8  9 10]
<class 'pandas.core.series.Series'>
[4 1 5 3 2 3]


# Ordinal logistic regression - playground

test logistic regression on derived modd and basic amount sleep phiscal activity and water

The method determines which solver from scipy.optimize is used, and it can be chosen from among the following strings:

*    ‘newton’ for Newton-Raphson, ‘nm’ for Nelder-Mead

*    ‘bfgs’ for Broyden-Fletcher-Goldfarb-Shanno (BFGS)

*   ‘lbfgs’ for limited-memory BFGS with optional box constraints

*    ‘powell’ for modified Powell’s method

*    ‘cg’ for conjugate gradient

*   ‘ncg’ for Newton-conjugate gradient

*   ‘basinhopping’ for global basin-hopping solver

*   ‘minimize’ for generic wrapper of scipy minimize (BFGS by default)


In [72]:
from statsmodels.miscmodels.ordinal_model import OrderedModel
import numpy as np
np.seterr(divide='ignore', invalid='ignore')

#print(data_diam)

# generate logistic regression
mod_prob = OrderedModel(data_diam['mood'],
                        data_diam[['amount of sleep',  'physical activity', 'water']],
                        distr='logit')
 
res_log = mod_prob.fit(method='bfgs')
print(res_log.summary())

Optimization terminated successfully.
         Current function value: 0.000002
         Iterations: 112
         Function evaluations: 120
         Gradient evaluations: 120
                             OrderedModel Results                             
Dep. Variable:                   mood   Log-Likelihood:            -1.0450e-05
Model:                   OrderedModel   AIC:                             14.00
Method:            Maximum Likelihood   BIC:                             12.54
Date:                Sat, 14 May 2022                                         
Time:                        16:19:28                                         
No. Observations:                   6                                         
Df Residuals:                      -1                                         
Df Model:                           7                                         
                        coef    std err          z      P>|z|      [0.025      0.975]
----------------------------

# check t value to p value

In [73]:
pred_row = res_log.predict(data_diam[['amount of sleep',  'physical activity', 'water']]).round(2)
print(f'pred_row\n{pred_row}')
pred = np.where(pred_row == 1)[1]
print(f'pred\n{pred}')
real_value = data_diam['mood'].to_numpy()
print(f'real_value\n{real_value}')
# Independent t-test: https://www.pythonfordatascience.org/independent-samples-t-test-python/ 
from scipy.stats import ttest_ind
# Calculate the T-test for the means of two independent samples of scores.
(tvalue, pvalue) = ttest_ind(real_value, pred)
print(f'tvalue\n{tvalue}')
print(f'pvalue\n{pvalue}')

pred_row
     0    1    2    3    4
0  0.0  0.0  0.0  1.0  0.0
1  0.0  0.0  0.0  0.0  1.0
2  0.0  0.0  0.0  1.0  0.0
3  1.0  0.0  0.0  0.0  0.0
4  0.0  1.0  0.0  0.0  0.0
5  0.0  0.0  1.0  0.0  0.0
pred
[3 4 3 0 1 2]
real_value
[4 5 4 1 2 3]
tvalue
1.1766968108291043
pvalue
0.2665662286967403
