# Using NTD to extract repetitive patterns and their activations

In [2]:
import numpy as np
from shootout.methods.runners import run_and_track
from shootout.methods.post_processors import df_to_convergence_df
from mu_ntd.algorithms.Sparse_ntd import sntd_mu
import pandas as pd
import plotly.express as px


The point here would be to compare extracted Q matrices with and without sparsity, and also check which optimization method is the fastest (Her or noHer).
We can toy with the 3d dimension to show sparsity helps fixing that dim.

In [3]:
# Loading the NNlogMel Spectrogram tensor provided by Axel
tensor_spectrogram = np.load("../Data/RWC1/1_nn_log_mel_grill_hop32_subdiv96.npy", allow_pickle = True)


In [3]:
# Running NTD on the spectrogram tensor
vars = {
    "beta":[1],
    "weights":[[0,0,0,0],[-2e2,1,1,-2e2]] # negative values for sparse, positive for l2
}
@run_and_track(**vars,name_store="run-testaudio", path_store="Results/", nb_seeds=3, algorithm_names=["MU no her"], extrapolate=False)
def run(ranks=[10,10,10], n_iter_max=100, beta=1, extrapolate=False, weights=[0,0,0,0]):
    # calling NTD
    l1weights=[0,0,0,0]
    l2weights=[0,0,0,0]
    for i in range(4):
        if weights[i]<0:
            l1weights[i]=-weights[i]
        else:
            l2weights[i]=weights[i]
    core, factors, errors, timings, _ = sntd_mu(tensor_spectrogram, 
    ranks=ranks, l1weights=l1weights, l2weights=l2weights,
    n_iter_max=n_iter_max, tol=0, beta=beta, return_costs=True,
    extrapolate=False, normalize=None)
    return {"errors":errors, "timings": timings, "factors": factors, "core": core}
    # TODO: bug when l2weights are not provided (None)




Converting single parameter swipe to singleton
Params values are currently: (1, [0, 0, 0, 0], False)
Irrelevant number of normalization booleans (different from the number of modes + 1 for the core), they have been set to False.


100%|██████████| 100/100 [01:36<00:00,  1.03it/s]


Irrelevant number of normalization booleans (different from the number of modes + 1 for the core), they have been set to False.


100%|██████████| 100/100 [01:36<00:00,  1.04it/s]


Irrelevant number of normalization booleans (different from the number of modes + 1 for the core), they have been set to False.


100%|██████████| 100/100 [01:44<00:00,  1.04s/it]


Params values are currently: (1, [-200.0, 1, 1, -200.0], False)
Irrelevant number of normalization booleans (different from the number of modes + 1 for the core), they have been set to False.


100%|██████████| 100/100 [01:46<00:00,  1.07s/it]


Irrelevant number of normalization booleans (different from the number of modes + 1 for the core), they have been set to False.


100%|██████████| 100/100 [01:38<00:00,  1.02it/s]


Irrelevant number of normalization booleans (different from the number of modes + 1 for the core), they have been set to False.


100%|██████████| 100/100 [01:33<00:00,  1.07it/s]


In [4]:
df = pd.read_pickle("Results/run-testaudio")

In [9]:
# Convergence plots
vars = {
    "beta":[1],
    "weights_0": [] #not necessary to put value here
}
df_conv = df_to_convergence_df(df,other_names=list(vars.keys()),groups=True,groups_names=list(vars.keys()))

In [10]:
# time
fig2 = px.line(df_conv, x="it", y="errors", color="weights_0", log_y=True, line_group="groups",facet_col="beta")
fig2.show()

In [21]:
# Looking at factors
import numpy as np

# Plotting script A La Axel
def reorder_rows(Q):
    # input: row matrix Q
    # output Q with permuted rows, so that first active rows are plotted first
    row_perm=[]
    n,m=Q.shape
    for j in range(m):
        imax = np.argmax(Q[:,j])
        if imax not in row_perm:
            row_perm.append(imax)
        if len(row_perm)==n:
            break
    # possible that some rows are not there, then we add them at the end
    if len(row_perm)<n:
        toadd = list(set([i for i in range(n)])-set(row_perm))
        # sort then by power
        norms = np.sum(Q[toadd,:], axis=1)
        order = np.argsort(norms)[::-1]
        row_perm = row_perm + [toadd[i] for i in order]
    return Q[row_perm,:]

for i in range(6):
    fac = df["factors"][i]
    # Nonstandard normalization by rows of Q
    Qt = fac[2].T/np.sum(fac[2],axis=1)
    Qt = reorder_rows(Qt)
    fig = px.imshow(Qt)
    fig.show()