In [22]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_spd_matrix  #to generate covariance matrices
from numpy.linalg import det
from scipy.stats import special_ortho_group

seed = 123
np.random.seed(seed)

In [23]:
#uploading data
X_data = np.load('./simulation/x.npy')
z_data = np.load('./simulation/z.npy')

K = np.unique(z_data).size
M = X_data.shape[0]
T = X_data.shape[1]

## INFERENCE AND GIBBS SAMPLER

$\pi_k$ is the $k^{th}$ row of the transition matrix and it is a vector containing the probabilities to reach another discrete state starting from $z = k$.\
Thus, it is a vector of the type: $\pi_k = (p_{k,0}, p_{k,1},\dots,p_{k,K-2},p_{k,K-1})$. 

The problem we are facing is conceptually similar to the *mixture of gaussians model* apart from the fact that there are $K$ vectors like $\pi_k$, while in the *mixture of gaussians model* there is only one.
So, we can write $P(z_t = q | z_{t-1} = k) = p_{k,q}$ or $P(z_t | z_{t-1} = k) = cat(\pi_k)$ for all $k$s. 

We choose as prior for every $\pi_k$ a Dirichlet distribution i.e. the conjugate prior of the categorical:
$\boldsymbol{\pi}_k | \alpha_k \sim \operatorname{Dir}\left(\boldsymbol{\alpha}_k\right)$ with $\boldsymbol{\alpha_{k}} = \boldsymbol{1} \in \mathbb{R}^{K}$ for an uniformative prior.\
In this way the posterior is again a Dirichlet: 
$P(\boldsymbol{\pi}_k | z_t, z_{t-1} = k) = \operatorname{Dir}\left(\boldsymbol{\alpha}_k + \boldsymbol{n}_k \right)$
where $\boldsymbol{n}_k = (n_{k,0}, \dots, n_{k,K-2}, n_{k,K-1})$ is the vector containing the number of times is observed a transition $k \rightarrow q$ for every $q$.

It looks like the whole procedure is an hybrid between *soft clustering* and *multivariate bayesian regression*: given all the experimental data $(\boldsymbol{X},\boldsymbol{Y})$ we want to divide them in $K$ clusters obtaining
$(\boldsymbol{X^{(k)}},\boldsymbol{Y^{(k)}})\; \forall k$. Each cluster is defined by two matrices $A_k$ and $Q_k$ and we want to perform a linear regression on them to find the matrices. Eventually we also want the probability that the next point will be sampled from a given cluster i.e. the transition matrix and the distribution of the variable $z_t$.

Similar to what happens in the *mixture of gaussians* the posterior on $z_t$ is:
$$
\operatorname{P}\left(z_t=k | z_{t-1},ALL\right)=\frac{r_{t k}}{\sum_k r_{t k}} \; \text{where} \;
r_{t k}=p_{z_{t-1},k}\left|Q_k\right|^{-1 / 2} \exp \left\{-\frac{1}{2}\left(\mathbf{X}_t-A_k \mathbf{X}_{t-1}\right)^T Q_k^{-1}\left(\mathbf{X}_t-A_k \mathbf{X}_{t-1}\right)^T \right\}
$$
In some sense it is a probability that takes into account the distance between the cluster labeled $k$ and the point $X_t$.

What remains to write are the $k$ linear regression (one for each cluster). It is worth to notice is that the linear regreassions are separated: $X^{(k)}$ does not interact with $X^{(k-1)}$. What I mean is that at this point the math of the problem says that we are going to perform linear regressions on $k$ systems of the form: $X_{s}^{(k)} = A_K X_{s - 1}^{(k)} + b_k$, where the time $s$ is redifined on the subset $X^{(k)}$ of $X$.
We want to perform the regressions in homogeneous coordinates, so the dynamics become $Y_{s}^{(k)} = A_K X_{s - 1}^{(k)}$, but now $Y_{s}^{(k)} \in \mathbb{R}^{M}$, $X_{s - 1}^{(k)} \in \mathbb{R}^{M + 1}$ and $A_{k} \in \mathbb{R}^{M x (M + 1)}$ and its last column is the vector $b_k$.







In [24]:
#preparing data in homogeneous coordinates
X = X_data[:, 0:T-1] #this X_{t-1} so times run between [0,T-1]

#attaching ones
X = np.vstack((X,np.ones(T-1))) #dimensions (M+1) x (T-1)

Y = X_data[:, 1:T] #this X_t so times run between [1,T], dimensions M x (T-1)

In [26]:
#let's start mfs.

#uniform random vector z_t of length N_samples
z_t = np.random.choice(K, size = T - 1, p = None) #for each couple (X_t,Y_t) there must be a value of z_t

#N_k is a matrix KxK containing as rows the vectors n_k
N_k = np.zeros((K,K))

#filling the matrix N_k
for i in range(len(z_t) - 1):
    N_k[z_t[i]][z_t[i+1]] += 1 

#now I label the experimental point based on the extracted z_t, will do it as a vocabolary of matrices.
X_k = {}
Y_k = {} #it is X_k translated oof one time-step (without homogeneous coordinates)
for k in np.arange(K):
    
    k_indices = np.where(z_t == k)
    X_k[str(k)] = X[:,k_indices]
    Y_k[str(k)] = Y[:,k_indices]

#now (as written in 'details') I should define for each k the matrices M_k, L_k, P_k and the number nu_k

In [28]:
print(len(z_t))
print(X.shape)
print(Y.shape)
print(k_indices)
print(Y_k['1'])

999
(3, 999)
(2, 999)
(array([  2,  10,  13,  16,  20,  25,  27,  28,  30,  31,  34,  37,  43,
        45,  46,  49,  50,  55,  57,  63,  68,  69,  72,  75,  79,  88,
        91,  94,  95,  99, 105, 106, 108, 111, 113, 118, 123, 125, 133,
       136, 138, 139, 140, 143, 144, 145, 150, 152, 162, 166, 170, 177,
       180, 181, 183, 191, 193, 194, 198, 199, 200, 204, 209, 217, 218,
       222, 225, 226, 227, 234, 236, 237, 245, 246, 252, 253, 255, 260,
       261, 262, 263, 264, 267, 271, 273, 279, 280, 283, 284, 286, 287,
       290, 300, 301, 302, 303, 306, 309, 311, 314, 319, 320, 326, 327,
       328, 329, 330, 334, 335, 337, 339, 340, 341, 345, 349, 356, 357,
       358, 359, 363, 369, 370, 378, 384, 386, 387, 390, 393, 397, 401,
       403, 415, 419, 425, 427, 430, 432, 436, 439, 443, 444, 445, 446,
       454, 455, 458, 469, 471, 474, 479, 483, 487, 489, 491, 493, 495,
       496, 498, 499, 502, 516, 519, 522, 524, 525, 527, 528, 532, 534,
       535, 536, 537, 539, 540, 542, 543,