## Import libraries

In [14]:
import pandas as pd
import numpy as np
import json
from IOHMM import UnSupervisedIOHMM
from IOHMM import OLS, DiscreteMNL, CrossEntropyMNL

## Read and clean data

In [2]:
df = pd.read_csv('assistment_data_corrected.csv', encoding="ISO-8859-1")

In [3]:
df = df[df['overlap_time'] > 0]
df = df[df['ms_first_response'] > 0]

## Create model and set up parameters

In [4]:
hmm = UnSupervisedIOHMM(num_states=2)

In [5]:
hmm.set_inputs(covariates_initial=[], covariates_transition=[], covariates_emissions=[['overlap_time']])

In [6]:
hmm.set_models(model_emissions = [OLS()], 
                model_transition=CrossEntropyMNL(solver='lbfgs'),
                model_initial=CrossEntropyMNL(solver='lbfgs'))

In [7]:
hmm.set_outputs([['correct']])

In [8]:
hmm.set_data([df])

## Train the model

The below cell runs the EM algorithm for 100 iterations. This takes a long time (~1h)!

In [9]:
hmm.train()

## Model parameters

In [11]:
# The coefficients of the OLS model for each hidden states
print(hmm.model_emissions[0][0].coef)
print(hmm.model_emissions[1][0].coef)

[[1. 0.]]
[[0. 0.]]


In [12]:
# The scale/dispersion of the OLS model for each hidden states
print(np.sqrt(hmm.model_emissions[0][0].dispersion))
print(np.sqrt(hmm.model_emissions[1][0].dispersion))

[[4.08488512e-14]]
[[0.]]


In [13]:
# The transition probability between two hidden states
print(np.exp(hmm.model_transition[0].predict_log_proba(np.array([[]]))))
print(np.exp(hmm.model_transition[1].predict_log_proba(np.array([[]]))))

[[0.73395324 0.26604676]]
[[0.47911331 0.52088669]]


## Save model to file

In [15]:
json_dict = hmm.to_json('./')
with open('./model.json', 'w') as outfile:
    json.dump(json_dict, outfile, indent=4, sort_keys=True)