In [3]:
import operator
from copy import copy
from scipy.special import softmax


class HMM_classifier():
    def __init__(self, base_hmm_model):
        self.models = {}
        self.hmm_model = base_hmm_model

    def fit(self, X, Y):
        """
        X: input sequence [[[x1,x2,.., xn]...]]
        Y: output classes [1, 2, 1, ...]
        """
        print("Detect classes:", set(Y))
        print("Prepare datasets...")
        X_Y = {}
        X_lens = {}
        for c in set(Y):
            X_Y[c] = []
            X_lens[c] = []

        for x, y in zip(X, Y):
            X_Y[y].extend(x)
            X_lens[y].append(len(x))

        for c in set(Y):
            print("Fit HMM for", c, " class")
            hmm_model = copy(self.hmm_model)
            hmm_model.fit(X_Y[c], X_lens[c])
            self.models[c] = hmm_model

    def _predict_scores(self, X):

        """
        X: input sample [[x1,x2,.., xn]]
        Y: dict with log likehood per class
        """
        X_seq = []
        X_lens = []
        for x in X:
            X_seq.extend(x)
            X_lens.append(len(x))

        scores = {}
        for k, v in self.models.items():
            scores[k] = v.score(X)

        return scores

    def predict_proba(self, X):
        """
        X: input sample [[x1,x2,.., xn]]
        Y: dict with probabilities per class
        """
        pred = self._predict_scores(X)

        keys = list(pred.keys())
        scores = softmax(list(pred.values()))

        return dict(zip(keys, scores))

    def predict(self, X):
        """
        X: input sample [[x1,x2,.., xn]]
        Y: predicted class label
        """
        pred = self.predict_proba(X)

        return max(pred.items(), key=operator.itemgetter(1))[0]

In [6]:
from hmmlearn import hmm
import numpy as np

df = pd.read_csv("predictive_model_dataset.csv",index_col=0)
    
le = preprocessing.LabelEncoder()
for column_name in df.columns:
    if df[column_name].dtype == object:
        df[column_name] = le.fit_transform(df[column_name])
    else:
        pass

y = df.prediction
features = ['Day','h_num','cSteps','h_tar','h_t_ac','daily_tar','Contextual_group','Activity Pattern']
X = df[features]
y = y.astype('int')

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.2)

model = HMM_classifier(hmm.MultinomialHMM())
model.fit(x,y)

# Predict probability per label
pred = model.predict_proba(np.random.randint(0, 10, size=(10, 2)))

# Get label with the most high probability


Detect classes: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
Prepare datasets...
Fit HMM for 0  class
Fit HMM for 1  class
Fit HMM for 2  class
Fit HMM for 3  class
Fit HMM for 4  class
Fit HMM for 5  class
Fit HMM for 6  class
Fit HMM for 7  class
Fit HMM for 8  class
Fit HMM for 9  class
done
