In [1]:
from kuberspatiotemporal.kuber import KuberModel
from kuberspatiotemporal.kuberspatiotemporal import KuberspatiotemporalModel, Feature

In [2]:
from scipy.stats import multinomial
from numpy.random import dirichlet
import numpy as np
import pandas as pd
import logging
logging.basicConfig(format='[%(levelname)s:%(name)s:%(funcName)s:%(lineno)d] %(message)s',level=logging.DEBUG)


In [3]:
def categorical_mixture(n_draws: int):
    pi = dirichlet([1,1],1)[0]
    pm11,pm12,pm21,pm22 = dirichlet([1,1,1],4)
    print("pi: ", pi)
    print('Component1: ',pm11,pm21)
    print('Component2: ',pm12,pm22)
    idx = multinomial(1,pi).rvs(size=n_draws)
    feat1 = np.array( [ [np.where(r==1)[0][0] for r in mn.rvs(size=n_draws)] for mn in [multinomial(1,pm11),multinomial(1,pm12)]] ).T
    feat2 = np.array( [ [np.where(r==1)[0][0] for r in mn.rvs(size=n_draws)] for mn in [multinomial(1,pm21),multinomial(1,pm22)]] ).T
    # print(idx,feat)
    return np.array([feat1[idx!=0],feat2[idx!=0]]).T

# df = pd.DataFrame(categorical_mixture(10), columns=['feat1','feat2'])
# display(df)
X = categorical_mixture(1000)
# display(X)


pi:  [0.69100421 0.30899579]
Component1:  [0.10600962 0.42636309 0.46762729] [0.3392473  0.43984323 0.22090947]
Component2:  [0.05631974 0.6770874  0.26659286] [0.4070272  0.21619229 0.37678051]


In [16]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import make_pipeline

logging.getLogger('kuberspatiotemporal.base').setLevel(logging.INFO)
logging.getLogger('kuberspatiotemporal.kuber').setLevel(logging.INFO)
logging.getLogger('kuberspatiotemporal.kuberspatiotemporal').setLevel(logging.INFO)

kst = KuberspatiotemporalModel(
    n_components=100, n_dim=2, scaling_parameter=1.1, nonparametric=True,
    features=[
        Feature(KuberModel(n_symbols=3, nonparametric=True, n_components=100),[0]),
        Feature(KuberModel(n_symbols=3, nonparametric=True, n_components=100),[1])
])

# print(kst._weights)

# for i in range(50):
#     print(i, kst.features[0].model._KuberModel__pmf)
kst.fit(X, n_iterations=1000)
#     kst.fit(X[:,0].reshape(-1,1), n_iterations=1)
    


[INFO:kuberspatiotemporal.base:fit:256] Learning 1000 samples (Batch)
[INFO:kuberspatiotemporal.base:fit:260] Step 0/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 10/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 20/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 30/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 40/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 50/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 60/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 70/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 80/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 90/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 100/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 110/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 120/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 130/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 140/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 150/1000
[INFO:kuberspatiotemporal.base:fit:260] Step 160/1000
[INFO:kuberspatiotempor

In [17]:
np.set_printoptions(suppress=True)
np.set_printoptions(precision=3)
display(np.sort(kst._weights))
idx = np.argsort(kst.features[0].model._weights)
display(idx,np.argmax(kst.features[0].model._weights))

display(kst.features[0].model._KuberModel__pmf[idx[-2:]])
display(kst.features[1].model._KuberModel__pmf[idx[-2:]])


array([0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.002, 0.056, 0.075, 0.076, 0.094, 0.12 , 0.147, 0.173,
       0.255])

array([95, 97, 98, 93, 96, 94, 92, 91, 90, 89, 88, 86, 87, 83, 85, 84, 82,
       81, 79, 78, 80, 75, 74, 77, 76, 72, 73, 71, 70, 68, 67, 69, 66, 65,
       63, 64, 62, 61, 58, 60, 57, 59, 55, 52, 56, 51, 54, 53, 50, 49, 47,
       48, 44, 46, 43, 45, 41, 40, 42, 39, 38, 35, 37, 34, 36, 33, 31, 32,
       30, 29, 27, 28, 25, 22, 26, 24, 23, 21, 19, 20, 18, 15, 17, 16, 12,
       13, 11,  8, 14, 10,  9,  7,  6,  4,  1, 99,  5,  3,  0,  2])

2

array([[0.149, 0.766, 0.085],
       [0.089, 0.298, 0.614]])

array([[0.137, 0.391, 0.472],
       [0.454, 0.435, 0.111]])

Nicht katastrophal .. am Besten mal metriken berechnen!

In [21]:
display(kst.features[0].model._KuberModel__pmf)
kst._weights



array([[0.149, 0.766, 0.085],
       [0.029, 0.624, 0.347],
       [0.089, 0.298, 0.614],
       [0.087, 0.521, 0.392],
       [0.04 , 0.755, 0.205],
       [0.22 , 0.017, 0.763],
       [0.011, 0.416, 0.574],
       [0.22 , 0.002, 0.778],
       [0.417, 0.235, 0.348],
       [0.021, 0.382, 0.597],
       [0.002, 0.639, 0.359],
       [0.049, 0.154, 0.797],
       [0.002, 0.025, 0.973],
       [0.007, 0.134, 0.858],
       [0.006, 0.255, 0.738],
       [0.53 , 0.393, 0.077],
       [0.003, 0.243, 0.753],
       [0.046, 0.388, 0.566],
       [0.892, 0.048, 0.06 ],
       [0.064, 0.311, 0.625],
       [0.01 , 0.606, 0.384],
       [0.042, 0.532, 0.426],
       [0.013, 0.625, 0.362],
       [0.042, 0.005, 0.953],
       [0.801, 0.198, 0.001],
       [0.002, 0.89 , 0.108],
       [0.007, 0.404, 0.589],
       [0.357, 0.105, 0.539],
       [0.008, 0.329, 0.663],
       [0.07 , 0.094, 0.836],
       [0.002, 0.633, 0.364],
       [0.014, 0.047, 0.938],
       [0.002, 0.504, 0.494],
       [0.

array([0.173, 0.076, 0.255, 0.147, 0.075, 0.12 , 0.056, 0.002, 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
       0.094])

$$\begin{array}{lcl}
\text{Responsibilities} &=& \left(\frac{\pi_i P(x_t=i|y_t,\Phi)}{\sum_{j\in I} \pi_j P(x_t=j|y_t,\Phi)}\right)_{i \in I, t\in T}\\
\text{Log_prob} &=& \left( \log \left(\sum_{i \in I} \pi_i P(x_t=i|y_t,\Phi) \right)\right)_{t \in T}\\
\text{mean_log_prob aka. score} &=& \frac{1}{|T|} \sum_{t \in T} \left( \log \left(\sum_{i \in I} \pi_i P(x_t=i|y_t,\Phi)\right) \right)
\end{array}$$