In [62]:
import numpy as np
from zeros import *

def OstarO(A, n=7):
    return (A.T @ A).round(n)

In [63]:
m = 5 # Number of measurements
k = 2 # Rank of O^*O

## Diagonalized prior 
prior = np.eye(m) * 100
prior[0,0] = 0.1
prior[1,1] = 0.2
prior[2,2] = 4

## The posterior precision at the first k eigenvectors 
precision = m/k + np.mean(np.diag(prior)[:k])


# M = O^*O
M = np.zeros((m,m))
for i in range(k):
    M[i,i] = precision - prior[i,i]

## Find O such that O^*O = M and O has unit norm rows
O = get_A(M, m).T
OstarO(O)

## Since O[2,:] = -O[4,:] we have clusterization

array([[2.55, 0.  , 0.  , 0.  , 0.  ],
       [0.  , 2.45, 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  ]])

In [59]:
## Clustered
m = 5
k = 2

o1o2 = get_A(np.diag([0.55, 1.45]), k).T

O = np.zeros((m,k))
O[:2,:2] = o1o2
O[2,1] = 1
O[3,0] = 1
O[4,0] = 1

# Returns O^*O
OstarO(O)

array([[ 2.55, -0.  ],
       [-0.  ,  2.45]])

In [64]:
## Not Clustered
m = 5
k = 2

o1o2 = get_A(np.diag([0.75, 1.25]), k).T
o3o4 = get_A(np.diag([0.8, 1.2]), k).T

O = np.zeros((m,k))
O[:2,:2] = o1o2 
O[2:4,:2] = o3o4
O[4,0] = 1

# Returns O^*O
print(OstarO(O))

# Looks nicer
O = np.array([
    [np.sqrt(3/8), -np.sqrt(5/8)],
    [np.sqrt(3/8),  np.sqrt(5/8)],
    [np.sqrt(2/5), -np.sqrt(3/5)],
    [np.sqrt(2/5),  np.sqrt(3/5)],
    [1,             0           ]        
])

# Returns O^*O
print(OstarO(O))

[[2.55 0.  ]
 [0.   2.45]]
[[2.55 0.  ]
 [0.   2.45]]


In [79]:
# Numerical simulations (takes < 1 minute on my laptop)
from scipy.spatial.distance import cdist
import pandas as pd
from joblib import Parallel, delayed


def gen():
    for m in range(20, 6, -1):
        for k in range(3, m-1):
            yield m, k

def f(m, k, N):
    count_non_clusterization = 0 # Counts number of *non*-clusterization designs
    for i in range(N):
        M, D, U, S = MDUS(m, k) # Random M. M = UDU^*, S = sqrt(D), see zeros.py
        O = get_A(D, m).T # Lemma A.1 from paper
        distances = cdist(O, O) # Distance matrix
        np.fill_diagonal(distances, 1) # Diagonal is ~ 0 but we dont want to count it
        dis = distances > 1e-9 # Nonzero distances
        count_non_clusterization += dis.all() # Add to counter if all distances > 0 ==> no clusterization
    non_clusterization = count_non_clusterization/N # Fraction of non clusterization designs
    dic = dict(m=m, 
               k=k, 
               cluster=1-non_clusterization
              )
    return dic

res = Parallel(n_jobs=-1)(delayed(f)(m,k, N=300) for m,k in gen())
res = pd.DataFrame(res)

## Column "cluster" is the fraction of designs exhibiting clusterization
res = res.assign(mk=res.m-res.k)
res.cluster.min()

0.9433333333333334