## Import libraries

In [102]:
import pandas as pd
import numpy as np
import json
from IOHMM import UnSupervisedIOHMM, SupervisedIOHMM
from IOHMM.family_wrapper import BinomialWrapper
from IOHMM import OLS, DiscreteMNL, CrossEntropyMNL, GLM

## Read and clean data

In [63]:
df = pd.read_csv('assistment_data_corrected.csv', encoding="ISO-8859-1")

In [64]:
df = df[df['overlap_time'] > 0]
df = df[df['ms_first_response'] > 0]

## Create model and set up parameters

In [107]:
hmm = SupervisedIOHMM(num_states=2)

In [108]:
hmm.set_inputs(covariates_initial=[], covariates_transition=[], covariates_emissions=[['overlap_time']])

In [109]:
hmm.set_models(model_emissions = [GLM(family=BinomialWrapper)], 
                model_transition=CrossEntropyMNL(solver='lbfgs'),
                model_initial=CrossEntropyMNL(solver='lbfgs'))

In [110]:
hmm.set_outputs([['correct']])

In [111]:
states = {}
correct = df['correct']

for i in range(len(correct)):
    try:
        state = np.zeros((2,))
        if correct[i] == 1:
            states[i] = np.array([0,1])
        else:
            states[i] = np.array([1,0])
    except:
        pass
        
hmm.set_data([[df, states]])
# for unsupervised, hmm.set_data([[df]])

## Train the model

The below cell runs the EM algorithm for 100 iterations. This takes a long time (~1h)!

In [112]:
hmm.train()

AttributeError: type object 'BinomialWrapper' has no attribute 'family'

## Model parameters

In [78]:
# The coefficients of the OLS model for each hidden states
print(hmm.model_emissions[0][0].coef)
print(hmm.model_emissions[1][0].coef)

[[ 5.72780849e-01 -5.17451376e-08]]
[[ 6.88085945e-01 -7.60489439e-08]]


In [79]:
# The scale/dispersion of the OLS model for each hidden states
print(np.sqrt(hmm.model_emissions[0][0].dispersion))
print(np.sqrt(hmm.model_emissions[1][0].dispersion))

[[0.49464117]]
[[0.4642616]]


In [80]:
# The transition probability between two hidden states
print(np.exp(hmm.model_transition[0].predict_log_proba(np.array([[]]))))
print(np.exp(hmm.model_transition[1].predict_log_proba(np.array([[]]))))

[[0.52091422 0.47908578]]
[[0.26612528 0.73387472]]


In [87]:
hmm.model_emissions[0][0].

AttributeError: 'OLS' object has no attribute 'score'

## Save model to file

In [15]:
json_dict = hmm.to_json('./')
with open('./model.json', 'w') as outfile:
    json.dump(json_dict, outfile, indent=4, sort_keys=True)

## Load back from file

In [10]:
hmm = UnSupervisedIOHMM.from_json(json.load(open('model.json')))

In [48]:
vars(hmm.model_emissions[0][0])

{'solver': 'svd',
 'fit_intercept': True,
 'est_stderr': False,
 'tol': 0.0001,
 'max_iter': 100,
 'reg_method': None,
 'alpha': 0,
 'l1_ratio': 0,
 'coef': array([[1., 0.]]),
 'stderr': array(None, dtype=object),
 'dispersion': array([[1.66862864e-27]]),
 'n_targets': 1,
 '_model': LinearRegression(copy_X=True, fit_intercept=False, n_jobs=None,
          normalize=False)}

In [61]:
hmm.model_emissions[0][0].predict([[-13434.545], [45]])

array([[1.],
       [1.]])