Here we analyse the windowed correlations over the different files at binary level.

In [None]:
import numpy as np
import pandas as pd
from glob import glob
%matplotlib inline

In [None]:
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle
import scipy

In [None]:
def plot_slice(df, s):
    fig, ax = plt.subplots(2, figsize=(9,10), sharex=True)
    df.iloc[:,s].plot(legend=False, ax=ax[0]);
    elements = np.arange(df.index.size)+1
    df.iloc[:,s].multiply(df.index.size).divide(elements, axis=0).plot(legend=False, ax=ax[1])
    # plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.tight_layout()

# Residual

In [None]:
source = "../emac.ml.tm1.f32.little.5x90x160x320_3.raw.residual.csv"
df = pd.read_csv(source, skiprows=1, index_col=0).astype(float)
df.head()

In [None]:
plot_slice(df, slice(None,None,None))

In [None]:
(df.corr() > .9).sum().sum() == df.columns.size  # since the threshold was set to .9

In [None]:
fig, ax = plt.subplots(figsize=(15,15))
sns.heatmap(df.corr(), ax=ax, square=True, cbar_kws={"shrink": 0.5});

In [None]:
fig, ax = plt.subplots(figsize=(15,15))
sns.heatmap(df.corr(), ax=ax, square=True, cbar_kws={"shrink": 0.5});

ax.add_patch(Rectangle((0, 0), 27, 27, fill=False, edgecolor='blue', lw=3));
ax.add_patch(Rectangle((27, 27), 14, 14, fill=False, edgecolor='blue', lw=3));
ax.add_patch(Rectangle((41, 41), 11, 11, fill=False, edgecolor='blue', lw=3));
ax.add_patch(Rectangle((52, 52), 7, 7, fill=False, edgecolor='blue', lw=3));

In [None]:
sns.clustermap(df.corr(), figsize=(15,15));

How can algorithms see this pattern?

In [None]:
plot_slice(df, slice(None,27,None))

In [None]:
plot_slice(df, slice(27,42,None))

In [None]:
plot_slice(df, slice(42,52,None))

In [None]:
plot_slice(df, slice(52,None,None))

# NoLZC

In [None]:
source = "../emac.ml.tm1.f32.little.5x90x160x320_3.raw.residual.nlzc.32.95.csv"
df = pd.read_csv(source, skiprows=1, index_col=0).astype(float)
df.head()

In [None]:
plot_slice(df, slice(None,None,None))

In [None]:
fig, ax = plt.subplots(figsize=(15,15))
sns.heatmap(df.corr(), ax=ax, square=True, cbar_kws={"shrink": 0.5});

In [None]:
fig, ax = plt.subplots(figsize=(15,15))
sns.heatmap(df.corr(), ax=ax, square=True, cbar_kws={"shrink": 0.5});

ax.add_patch(Rectangle((0, 0), 41, 41, fill=False, edgecolor='blue', lw=3));
ax.add_patch(Rectangle((41, 41), 9, 9, fill=False, edgecolor='blue', lw=3));

In [None]:
sns.clustermap(df.corr(), figsize=(15,15));

In [None]:
plot_slice(df, slice(None,41,None))

In [None]:
plot_slice(df, slice(41,None,None))

# Bplanes

In [None]:
source = "../emac.ml.tm1.f32.little.5x90x160x320_3.raw.residual.bplanes.32.csv"
df = pd.read_csv(source, skiprows=1, index_col=0).astype(float)
df.head()

In [None]:
plot_slice(df, slice(None,None,None))

In [None]:
fig, ax = plt.subplots(figsize=(15,15))
sns.heatmap(df.corr(), ax=ax, square=True, cbar_kws={"shrink": 0.5});

In [None]:
fig, ax = plt.subplots(figsize=(15,15))
sns.heatmap(df.corr(), ax=ax, square=True, cbar_kws={"shrink": 0.5});

ax.add_patch(Rectangle((0, 0), 5, 5, fill=False, edgecolor='blue', lw=3));
ax.add_patch(Rectangle((5, 5), 4, 4, fill=False, edgecolor='blue', lw=3));
ax.add_patch(Rectangle((9, 9), 4, 4, fill=False, edgecolor='blue', lw=3));
ax.add_patch(Rectangle((13, 13), 20, 20, fill=False, edgecolor='blue', lw=3));

In [None]:
sns.clustermap(df.corr(), figsize=(15,15));

In [None]:
plot_slice(df, slice(None,6,None))

In [None]:
plot_slice(df, slice(6,9,None))

In [None]:
plot_slice(df, slice(9,13,None))

In [None]:
plot_slice(df, slice(13,None,None))

In [None]:
sns.clustermap(df.corr(), figsize=(15,15));

In [None]:
from functools import namedtuple

In [None]:
method, metric = "average", "euclidean"
link = scipy.cluster.hierarchy.linkage(df.corr(), method=method, metric=metric)

In [None]:
def get_members_of_lvl(lvl, link):
    LVL = namedtuple("Level","members,lvl")
    clusters = [LVL(members=[x], lvl=0) for x in range(link.shape[0]+1)]
    for i in range(link.shape[0]):
        ix_1,ix_2 = int(link[i][0]), int(link[i][1])
        group = clusters[ix_1].members + clusters[ix_2].members
        max_group_number = max(clusters[ix_1].lvl,clusters[ix_2].lvl)
        clusters[ix_1] = LVL(members=clusters[ix_1].members, lvl=max_group_number)
        clusters[ix_2] = LVL(members=clusters[ix_2].members, lvl=max_group_number)
        new_lvl = max_group_number + 1
        clusters.append(LVL(members=sorted(group), lvl=new_lvl))
    maximum_lvl = clusters[-1].lvl
    clusters = [LVL(x.members,maximum_lvl-x.lvl) for x in clusters]
    
    selection = [x for x in clusters if x.lvl==lvl]
    lvl -= 1
    while lvl > 0:
        candidates = [x for x in clusters if x.lvl==lvl]
        winner = [x for x in candidates if not set(selection[0].members).issubset(set(x.members))]
        selection.append(winner[0])
        lvl -= 1
    for s in selection:
        print(s.members)

In [None]:
get_members_of_lvl(3, link)

In [None]:
sns.clustermap(df.corr(), figsize=(15,15), method="average", metric="euclidean");

Now we can algorithmically define/discover clusters of probabability distributions. After this we need to merge these probability distributions to single representations and with this define a probability tablel including the representations and specific group members.