In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal,norm
import math
from hmmlearn import hmm
import numpy.random as npr
from pyslds.models import DefaultSLDS
from pylds.util import random_rotation


In [None]:
class HMM(object):
    def __init__(self,K = None, T=None, transition_dis = None, init_prob = None, emission = None, y=None):
        
        self.y = y # T x N
        self.K = K # dimension of hidden discrete states
        self.T = T # number of observations
        self.N = self.y.shape[1] # dimension of the observations

        self.transition_dis = transition_dis # the discrete variable transition probability matrix, K x K
        self.init_prob = init_prob # the initial probability of the discrete variable, K
        
        self.emission = emission # the emission probability of the hidden variable, K x N
    
    def forward_backward(self, data):
        forward_hat = np.zeros((self.T,self.K))
        backward_hat = np.zeros((self.T,self.K))
        scale_factors = np.zeros((self.T))

        forward_hat[0,:] = self.init_prob * self.emission[:,data[0]]
        scale_factors[0] = np.sum(forward_hat[0,:])
        forward_hat[0,:] = forward_hat[0,:]/scale_factors[0]
        
        for t in range(self.T-1):
            temp = np.matmul(forward_hat[t,:], self.transition) * self.emission[:,data[t+1]] 
            scale_factors[t+1] = np.sum(temp)
            forward_hat[t+1,:] = temp/scale_factors[t+1]

        backward_hat[-1,:] = scale_factors[-1]
        for t in reversed(range(self.T-1)):
            temp = np.matmul(backward_hat[t+1,:] * self.emission[:,data[t+1]], self.transition.T)
            backward_hat[t,:] = temp/scale_factors[t]

        a = np.zeros((self.T,self.K))
        b = np.zeros((self.T,self.K,self.K))
        for i in range(self.T):
            for j in range(self.K):
                a[i,j] = forward_hat[i,j]*backward_hat[i,j]
            temp = np.sum(a[i,:])
            a[i,:] = a[i,:]/temp

        for t in range(self.T-1):
            for i in range(self.K):
                for j in range(self.K):
                    b[t,i,j] = scale_factors[t+1]*forward_hat[t,i]*backward_hat[t+1,j] * self.transition[i,j] * self.emission[j,data[t+1]]

        self.a = a
        self.b = b

In [None]:
class timeVaryingLDS(object):
    def __init__(self,T=None, transition_con = None, emission = None, Q = None, R = None, u0 = None, V0 = None, y=None):
        
        self.y = y # T x N
        self.M = transition_con.shape[1] # dimension of hidden continuous states
        self.T = T # number of observations
        self.N = self.y.shape[1] # dimension of the observations
        
        self.C = emission # the emission probability of the continuous variable, N x M
        self.A = transition_con # the continuous variable transition probability matrix, M x M 
        
        self.Q = Q # Q is the covariance matrix of noise term added to the hidden continuous state transition, M x M
        self.R = np.eye(self.N) if R is None else R # R is the covariance matrix of noise term added to the emission, N x N
        
        self.u0 = u0 # u0 is the initial estimate of the mean of x1, M x 1
        self.V0 = V0 # V0 is the initial estimate of the variance of x1, M x M

        self.P = np.zeros((self.T, self.M, self.M))
        self.P[:,:,:,] = np.eye(self.M)  # P is an intermediate variable during inference, T x M x M
        self.u = np.zeros((self.T, self.M)) # T x M x 1
        self.V = np.zeros((self.T, self.M, self.M)) # T x M x M
        self.K = np.zeros((self.T, self.M, self.N)) # T x M x N
        self.c = np.zeros((self.T)) # T x 1

        # for backward passing
        self.u_hat = np.zeros((self.T, self.M)) # T x M x 1
        self.V_hat = np.zeros((self.T, self.M, self.M)) # T x M x M
        self.J = np.zeros((self.T, self.M, self.M)) # T x M x M


    
    def kalman_filtering(self):
        # print(self.C.shape,self.u0)
        S_temp = np.matmul(np.matmul(self.C, self.V0), self.C.T) + self.R
        Q_temp = np.matmul(self.C, self.u0)
        I = np.eye(self.M)
        
        self.V[0] = np.matmul((I - np.matmul(np.matmul(np.matmul(self.V0, self.C.T), np.linalg.inv(S_temp)), self.C)), self.V0)
        self.P[0] = np.matmul(np.matmul(self.A, self.V[0]), self.A.T) + self.Q
        self.K[0] = np.matmul(np.matmul(self.P[0], self.C.T), np.linalg.inv(np.matmul(np.matmul(self.C, self.P[0]), self.C.T) + self.R))
        self.u[0] = self.u0 + np.matmul(self.K[0], self.y[0] - Q_temp)
        # print(self.y[0],Q_temp,S_temp)
        self.c[0] = multivariate_normal.pdf(self.y[0], Q_temp, S_temp)

        for i in range(1,self.T,1):
            I = np.eye(self.M)
            Q_temp = np.matmul(np.matmul(self.C, self.A), self.u[i-1])
            
            self.V[i] = np.matmul((I - np.matmul(self.K[i-1], self.C)), self.P[i-1])
            self.P[i] = np.matmul(np.matmul(self.A, self.V[i]), self.A.T) + self.Q
            S_temp = np.matmul(np.matmul(self.C, self.P[i]), self.C.T) + self.R

            self.K[i] = np.matmul(np.matmul(self.P[i], self.C.T), np.linalg.inv(S_temp))

            self.u[i] = np.matmul(self.A, self.u[i-1]) + np.matmul(self.K[i-1], self.y[i] - Q_temp)

            self.c[i] = multivariate_normal.pdf(self.y[i], Q_temp, S_temp)

    def kalman_smoothing(self):

        self.u_hat[-1] = self.u[-1]
        self.V_hat[-1] = self.V[-1]

        for i in range(self.T-2,-1,-1):
            # print(self.V[i,k],self.A[k].T,self.P[i,k])
            self.J[i] = np.matmul(np.matmul(self.V[i], self.A.T), np.linalg.inv(self.P[i]))
            self.u_hat[i] = self.u[i] + np.matmul(self.J[i], self.u_hat[i+1] - np.matmul(self.A, self.u[i]))
            self.V_hat[i] = self.V[i] + np.matmul(np.matmul(self.J[i], self.V_hat[i+1] - self.P[i]), self.J[i].T)

        sub_1 = np.zeros((self.M,self.M))
        sub_2 = np.zeros((self.M,self.M))
        sub_3 = np.zeros((self.M,self.M))
        sub_4 = np.zeros((self.M,self.M))

        sub_5 = np.zeros((self.N,self.M))
        sub_6 = np.zeros((self.M,self.M))
        sub_7 = np.zeros((self.N,self.N))
        sub_8 = np.zeros((self.M,self.N))

        for i in range(1,self.T,1):
            sub_1 += np.matmul(self.V_hat[i,k],self.J[i-1,k].T) + np.outer(self.u_hat[i,k],self.u_hat[i-1,k].T) # z[n]z[n-1]
            
            sub_2 += self.V_hat[i-1,k] + np.outer(self.u_hat[i-1,k], self.u_hat[i-1,k].T)

            sub_3 += self.V_hat[i,k] + np.outer(self.u_hat[i,k], self.u_hat[i,k].T) # z[n]z[n]
            sub_4 += (np.matmul(self.V_hat[i,k],self.J[i-1,k].T) + np.outer(self.u_hat[i,k],self.u_hat[i-1,k].T)).T #z[n-1]z[n]

        for i in range(self.T):
            sub_5 += a[i,k] * np.outer(self.y[i], self.u_hat[i,k].T) # x[n] * E[z[n]].T
            sub_6 += a[i,k] * self.V_hat[i,k] + np.outer(self.u_hat[i,k], self.u_hat[i,k].T) # z[n]z[n]
            sub_7 += a[i,k] * np.outer(self.y[i], self.y[i].T) # x[n]x[n]
            sub_8 += a[i,k] * np.outer(self.u_hat[i,k], self.y[i].T) #E[z[n]] * x[n].T 

In [None]:
class SLDS(object):
    def __init__(self, K = None, T=None, transition_dis = None, init_prob = None, transition_con = None, emission = None, Q = None, R = None, u0 = None, V0 = None, y=None):

        if(transition_con is None or transition_dis is None or emission is None):
            raise ValueError("Set proper system dynamics.")
        self.N = self.y.shape[1]
        self.M = transition_con.shape[1]
        self.C = emission # the emission probability of the continuous variable, K x N x M
        self.A = transition_con # the continuous variable transition probability matrix, K x M x M 
        
        self.Q = Q # Q is the covariance matrix of noise term added to the hidden continuous state transition, K x M x M
        self.R = np.eye(self.N) if R is None else R # R is the covariance matrix of noise term added to the emission, K x N x N
        
        self.u0 = u0 # u0 is the initial estimate of the mean of x1, K x M x 1
        self.V0 = V0 # V0 is the initial estimate of the variance of x1, K x M x M
        
        self.hmm = HMM(K = None, T=None, transition_dis = None, init_prob = None, emission = None, y=None)
        R_hat, C_hat, Q_hat, A_hat, V0_hat, u0_hat = self.calculate_effective_con()
        self.lds = timeVaryingLDS(T=self.T, transition_con = A_hat, emission = C_hat, Q = Q_hat, R = R_hat, u0 = u0_hat, V0 = V0_hat, y=y)
        self.lds.kalman_filtering()
        self.lds.kalman_smoothing()

    def calculate_effective_con(self):
        K = self.hmm.K
        T = self.hmm.T
        N = self.N
        M = self.M

        R_hat = np.zeros((T,N,N))# T x N x N
        C_hat = np.zeros((T,N,M)) # T x N x M
        Q_hat = np.zeros((T, M, M)) # T x M x M
        A_hat = np.zeros((T,M,M)) # T x M x M
        V0_hat = np.zeros((M,M)) # M x M
        u0_hat = np.zeros(M) # M

        for t in range(T):
            R_hat_inv = np.zeros((N,N))
            Q_hat_inv = np.zeros((M,M))
            V0_hat_inv = np.zeros(M,M)
            for k in range(K):
                R_hat_inv += np.linalg.inv(self.R[k])*self.hmm.a[t,k]
                C_hat[t] += self.hmm.a[t,k]*np.linalg.inv(self.R[k]) @ C_hat[t]
            R_hat[t] = np.linalg.inv(R_hat_inv)
            C_hat[t] = R_hat[t] @ C_hat[t]
        for k in range(K):
            Q_hat_inv += self.hmm.a[-1,k]*self.Q[k] + self.hmm.a[-1,k]*self.C[-1].T @ np.linalg.inv(self.R[k]) @ self.C[k]
        Q_hat_inv -= C_hat[-1].T @ np.linalg.inv(R_hat[-1]) @ C_hat[-1]
        Q_hat[-1] = np.linalg.inv(Q_hat_inv)
        
        for k in range(K):
            A_hat[-1] += self.hmm.a[-1,k] * np.linalg.inv(self.Q[k]) @ self.A[k]
        A_hat[-1] = Q_hat[-1] @ A_hat[-1]
        
        for t in range(T-2,0,-1):
            for k in range(K):
                Q_hat_inv += self.hmm.a[t,k]*self.Q[k] + self.hmm.a[t+1,k] * self.A[k].T @ np.linalg.inv(self.Q[k]) @ self.A[k] + self.hmm.a[t,k] * self.C[k].T @ np.linalg.inv(self.R[k]) @ self.C[k]
            Q_hat_inv -= A_hat[t+1].T @ np.linalg.inv(Q_hat[t+1]) @ A_hat[t+1] - C_hat[t].T @ np.linalg.inv(R_hat[t]) @ C_hat[t]
            Q_hat[t] = np.linalg.inv(Q_hat_inv)
            
            for k in range(K):
                A_hat[t] += self.hmm.a[t,k] * np.linalg.inv(self.Q[k]) @ self.A[k]
            A_hat[t] = Q_hat[t] @ A_hat[t]

        for k in range(K):
            V0_hat_inv += self.hmm.a[0,k]* np.linalg.inv(self.V0[k]) + self.hmm.a[1,k] * self.A[k].T @ np.linalg.inv(self.Q[k]) @ self.A[k] + self.hmm.a[t,k] * self.C[k].T @ np.linalg.inv(self.R[k]) @ self.C[k]
        V0_hat_inv -= A_hat[1].T @ np.linalg.inv(Q_hat[1]) @ A_hat[1] - C_hat[0].T @ np.linalg.inv(R_hat[0]) @ C_hat[0]
        V0_hat = np.linalg.inv(V0_hat_inv)
        
        for k in range(K):
            u0_hat += self.hmm.a[0,k] * np.linalg.inv(self.V0[k]) @ self.u0[k]
        u0_hat = V0_hat @ u0_hat
        return R_hat, C_hat, Q_hat, A_hat, V0_hat, u0_hat
    
    def learning(self):
        Sigma = np.zeros((self.N,self.N))
        for k in range(self.Z):
            self.u0[k] = self.u_hat[0,k]
            self.V0[k] = self.V_hat[0,k] + np.outer(self.u_hat[0,k], self.u_hat[0,k].T) - np.outer(self.u_hat[0,k], self.u_hat[0,k].T)

            # E[z[n]] : M x 1
            # E[z[n]z[n-1].T] : M x M
            # E[z[n]z[n].T] : M x M
            a = np.zeros((self.T,self.Z))
            b = np.zeros((self.T,self.Z,self.Z))
            for i in range(self.T):
                for j in range(self.Z):
                    a[i,j] = self.forward[i,j] * self.backward[i,j]
                temp = np.sum(a[i,:])
                a[i,:] = a[i,:]/temp

            for t in range(self.T - 1):
                for i in range(self.Z):
                    for j in range(self.Z):
                        b[t,i,j] = self.forward[t,i] * self.backward[t+1,j] * self.transition_dis[i,j] * self.q[t+1,i]


            sub_1 = np.zeros((self.M,self.M))
            sub_2 = np.zeros((self.M,self.M))
            sub_3 = np.zeros((self.M,self.M))
            sub_4 = np.zeros((self.M,self.M))

            sub_5 = np.zeros((self.N,self.M))
            sub_6 = np.zeros((self.M,self.M))
            sub_7 = np.zeros((self.N,self.N))
            sub_8 = np.zeros((self.M,self.N))

            for i in range(1,self.T,1):
                sub_1 += np.matmul(self.V_hat[i,k],self.J[i-1,k].T) + np.outer(self.u_hat[i,k],self.u_hat[i-1,k].T) # z[n]z[n-1]
                
                sub_2 += self.V_hat[i-1,k] + np.outer(self.u_hat[i-1,k], self.u_hat[i-1,k].T)

                sub_3 += self.V_hat[i,k] + np.outer(self.u_hat[i,k], self.u_hat[i,k].T) # z[n]z[n]
                sub_4 += (np.matmul(self.V_hat[i,k],self.J[i-1,k].T) + np.outer(self.u_hat[i,k],self.u_hat[i-1,k].T)).T #z[n-1]z[n]

            for i in range(self.T):
                sub_5 += a[i,k] * np.outer(self.y[i], self.u_hat[i,k].T) # x[n] * E[z[n]].T
                sub_6 += a[i,k] * self.V_hat[i,k] + np.outer(self.u_hat[i,k], self.u_hat[i,k].T) # z[n]z[n]
                sub_7 += a[i,k] * np.outer(self.y[i], self.y[i].T) # x[n]x[n]
                sub_8 += a[i,k] * np.outer(self.u_hat[i,k], self.y[i].T) #E[z[n]] * x[n].T 
            

            self.transition_con[k] = np.matmul(sub_1, np.linalg.inv(sub_2))
            temp = 1/(self.T-1) * (sub_3 - np.matmul(self.transition_con[k],sub_4) - np.matmul(sub_1,self.transition_con[k].T) + np.matmul(np.matmul(self.transition_con[k],sub_2),self.transition_con[k].T))
            # self.Gamma[k] = 1/(self.T-1) * (sub_3 - np.matmul(self.transition_con[k],sub_4) - np.matmul(sub_1,self.transition_con[k].T) + np.matmul(np.matmul(self.transition_con[k],sub_2),self.transition_con[k].T))
            # print(self.Gamma[k].shape,temp.shape)
            # print(self.Gamma)

            self.emission[k] = np.matmul(sub_5, np.linalg.inv(sub_6))
            Sigma += 1/self.T * (sub_7 - np.matmul(self.emission[k],sub_8) - np.matmul(sub_5,self.emission[k].T) + 
                                 np.matmul(np.matmul(self.emission[k],sub_6),self.emission[k].T))
            self.Sigma = Sigma / self.Z



            for i in range(self.Z):
                for j in range(self.Z):
                    self.transition_dis[i,j] = np.sum(b[0:-1,i,j])/np.sum(b[0:-1,i,:])

            for i in range(self.Z):
                self.init_prob[i] = a[0,i]/np.sum(a[0,:])


In [None]:
def main():
	
	# n_dis = 2 # Z
	# n_con = 2 # M
	# self.T = 2 # N
	# n_time = 400 # T

	# transition_dis = np.array([[0.95,0.05],[0.05,0.95]]) # the discrete variable transition probability matrix, Z x Z
	# init_prob = np.array([0.4,0.6]) # the initial probability of the discrete variable, N
	
	# emission = np.array([[[0.5, 0.5],[0.1, 0.9]],[[0.2, 0.8],[0.9, 0.1]]]) # the emission probability of the continuous variable, Z x N x M

	# transition_con = np.array([[[0.9, 0.1],[0.1, 0.9]],[[0.1, 0.9],[0.1, 0.9]]]) # the continuous variable transition probability matrix, Z x M x M 
	
	# Gamma = np.array([[[0.1, 0.3],[0.3, 0.1]],[[1, 0.5],[0.5, 1]]]) # Gamma is the covariance matrix of noise term added to the hidden state transition, Z x M x M
	# Sigma = np.array([[0.2, 0.8],[0.8, 0.2]]) # Sigma is the covariance matrix of noise term added to the emission, N x N
	# x0 = np.array([[0.2,0.2],[0.5, 0.5]]) # N x M



	# states_dis, states_con,obs = generate_examples(T=n_time, Z = n_dis, M = n_con, N = self.T, transition_dis = transition_dis, init_prob = init_prob, 
	# 											transition_con = transition_con, emission = emission, Gamma = Gamma, Sigma = Sigma, x0 = x0)
	

	p_old = -10000
	tol = 0.01
	max_iter = 10
	


	n_dis = 2               # Number of discrete latent states
	self.T = 2           # Observed data dimension
	n_con = 2        # Latent state dimension
	D_input = 0         # Exogenous input dimension
	n_time = 2000            # Number of time steps to simulate
	# K = 2               # Number of discrete latent states
	# self.T = 2           # Observed data dimension
	# n_con = 2        # Latent state dimension
	# D_input = 0         # Exogenous input dimension
	# T = 2000            # Number of time steps to simulate

	true_mu_inits = [np.ones(n_con) for _ in range(n_dis)]
	true_sigma_inits = [np.eye(n_con) for _ in range(n_dis)]
	true_As = [.9 * random_rotation(n_con)
			for k in range(n_dis)]
	true_Bs = [3 * npr.randn(n_con, D_input) for k in range(n_dis)]
	true_sigma_states = [np.eye(n_con) for _ in range(n_dis)]
	true_C = np.random.randn(self.T, n_con)
	true_Ds = np.zeros((self.T, D_input))
	true_sigma_obs = np.eye(self.T)

	true_model = DefaultSLDS(n_dis, self.T, n_con, D_input,mu_inits=true_mu_inits, sigma_inits=true_sigma_inits,
		As=true_As, Bs=true_Bs, sigma_statess=true_sigma_states,
		Cs=true_C, Ds=true_Ds, sigma_obss=true_sigma_obs)
	
	inputs = npr.randn(n_time, D_input)
	z = np.arange(n_dis).repeat(n_time // n_dis)

	obs, states_con, states_dis = true_model.generate(n_time, inputs=inputs, stateseq=z)
	# print(obs.shape,states_con.shape,states_dis.shape)

	transition_dis_init = np.array([[0.7,0.3],[0.3,0.7]]) 
	init_prob_init = np.array([0.9,0.1]) 
	
	emission_init = np.array([[[0.3, 0.7],[0.2, 0.8]],[[0.25, 0.75],[0.4, 0.6]]]) 
	transition_con_init = np.array([[[0.9, 0.1],[0.1, 0.9]],[[0.1, 0.9],[0.1, 0.9]]])
	
	Gamma_init = np.array([[[1.0, 0],[0, 1.0]],[[1.0, 0],[0, 1.0]]])  
	Sigma_init = np.array([[1.0, 0],[0, 1.0]])
	u0 = np.array([[0.3, 0.3],[0.5, 0.5]]) # Z x M x 1
	V0 = Gamma_init # Z x M x M

	slds = SLDS(Z = n_dis, T=n_time, transition_dis = transition_dis_init, init_prob = init_prob_init, transition_con = transition_con_init, 
			 emission = emission_init, Gamma = Gamma_init, Sigma = Sigma_init, u0 = u0, V0 = V0, y=obs)
	
	for ite in range(max_iter):
	
		# print(slds.forward,slds.backward)
		# print(slds.q,slds.h)
		slds.kalman_filtering()
		slds.kalman_smoothing()
		slds.forward_backward()
		slds.learning()

		p = np.sum(np.log(slds.c))
		print(f'The current iteration is: {ite}. The likelihood is {p}',end='\r')
		if p>p_old and p - p_old < tol:
			break
		p_old = p

	print('u0\n',slds.u0,'\nV0\n',slds.V0,'\ntransition_dis\n',slds.transition_dis,'\ntransition_con\n',slds.transition_con,'\nemission\n',slds.emission,'\nGamma\n',
	   slds.Gamma,'\ninit_prob\n',slds.init_prob,'\nGamma\n',slds.Gamma,'\nSigma\n',slds.Sigma)
	return slds,states_dis,states_con,obs

slds,states_dis,states_con,obs = main()

In [14]:

for i in range(5,0,-1):
    print(i)

5
4
3
2
1
