# Prepare custom dataset

In [18]:
from base import Normalize, ToFDA, TacDataset
from torchvision import transforms

transform = transforms.Compose([Normalize(axis=0), ToFDA(flatten=False)])
ds = TacDataset('../data', transform=transform)

# Plot raw data

In [24]:
import pandas as pd
from numpy.fft import fft
from matplotlib import pyplot as plt
from sklearn.manifold import TSNE

from skfda import FDataGrid
from skfda.representation import basis
import numpy as np

base = basis.Fourier(nbasis=33)

for i, (sample, label) in enumerate(ds):
    if i == 28:
        print(sample.shape)

TypeError: __init__() got an unexpected keyword argument 'nbasis'

In [27]:
basis.Fourier()

Fourier(domain_range=((0, 1),), n_basis=3, period=1)

# Compress data with Tucker decomposition
1. Compute covariance matrix for each multi-channel frequency series
2. Stack covariance matrix into a 3D covariance tensor $T \in \mathbb{R}^{C \times C \times N}$
3. Use core tensor $\mathcal{G}$ of the Tucker decomposition $T = \mathcal{G} \times_1 U_1 \times_2 U_2 \times_3 U_3$ as a compressed representation
4. Visualize the latent vectors

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
from numpy.fft import fft
from skfda import FDataGrid
from skfda.representation import basis
from tensorly.decomposition import non_negative_tucker, tucker
from tensorly.tenalg import mode_dot

plt.rcParams['figure.dpi'] = 150
%matplotlib notebook

N_BASIS = 65
STRIDE = 128
WINDOW_SIZE = 256
RANK = 3

fd_basis = basis.Fourier([0, 2 * np.pi], n_basis=N_BASIS, period=1)

def compute_cov_fda(data):
    ''' Compute covariance matrix with functional basis decomposition.'''
    fd = FDataGrid(data.T).to_basis(fd_basis)
    coeffs = fd.coefficients.squeeze()
    return np.cov(coeffs[:, 1:].T)

cov_array = []
labels, params = [], []

for sample, label in ds:
    cov_array.append(compute_cov_fda(sample))
    labels.append(ds.get_texture_name(label[0]))
    params.append(labels[1:])
    
cov_tensor = np.transpose(np.asarray(cov_array), [1, 2, 0])
_, factors = non_negative_tucker(cov_tensor, rank=(RANK, RANK, cov_tensor.shape[2]))
covM = mode_dot(cov_tensor, factors[1].T, 1)
covM = mode_dot(covM, factors[0].T, 0)
covM = covM.reshape(RANK * RANK, len(labels)).T
M = (covM - np.min(covM))/(np.max(covM) - np.min(covM)) * 255

ModuleNotFoundError: No module named 'tensorly'

In [None]:
M3d = M[:, :3]

df0 = pd.DataFrame(labels, columns=["texture"])
df1 = pd.DataFrame(params, columns=["pressure", "speed"])
df2 = pd.DataFrame(M3d, columns=["x1", "x2", "x3"])
df  = pd.concat([df0, df1, df2], axis=1)

textures = df["texture"].unique()
pressures = df["pressure"].unique()
speeds = df["speed"].unique()
cmap = plt.cm.get_cmap("plasma", len(textures))

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1, projection='3d')

for i, texture in enumerate(textures):
    X = df[df["texture"] == texture]
    x1, x2, x3 = X['x1'], X['x2'], X['x3'] 
    ax.scatter(x1, x2, x3, s=20, c=np.tile(cmap(i), (len(x1), 1)))
    ax.legend(textures)

plt.show()

In [None]:
from sklearn import neighbors, datasets

n_neighbors = 10
train_ratio = 0.7

y = np.array(labels)
index = np.arange(len(y), dtype=int)
np.random.shuffle(index)
train_id = index[:int(train_ratio * len(index))]

clf = neighbors.KNeighborsClassifier(n_neighbors, weights="distance")
clf.fit(M[train_id, :], y[train_id])
rate = np.sum(clf.predict(M) == y)/len(y)
print("Classification accuracy: {:.2f}%".format(rate * 100))

# Train the network

In [None]:
import matplotlib.pyplot as plt
import torch
from ml_base import RVAE
from torch import optim
from torch.utils.data import DataLoader

BATCH_SIZE = 2
EPOCHS = 100
INPUT_DIM = 1
NUM_CLASS = 4

device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')

train_loader = DataLoader(ds, batch_size=BATCH_SIZE, collate_fn=PadSequence(), num_workers=6, shuffle=True)
rvae = RVAE(input_dim=INPUT_DIM, hidden_dim=16, encoding_dim=3, extra_dim=2, output_dim=NUM_CLASS, n_layers=3, device=device)
loss_list = []

def train_once(x, y, model, optimizer, criterion):
    optimizer.zero_grad()
    output = model(x)
    target = y.to(device)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    
    return loss.item()
    
def train_model(data_loader, model):
    optimizer = optim.SGD(model.parameters(), lr = 1e-3)
    criterion = nn.CrossEntropyLoss()
    # loss_list = []
    
    for epoch in range(EPOCHS):
        running_loss = 0.0
        
        for i, (batch, lengths, params, targets) in enumerate(data_loader):
            loss = train_once((batch, lengths, params), targets, model, optimizer, criterion)
            running_loss += loss
            loss_list.append(loss)
            
            if i % 10 == 9:
                print('Epoch {}, {:.2f}% - loss: {:.6f}'.format(epoch + 1, 100.0 * (i + 1.0) / len(data_loader), running_loss / 10))
                running_loss = 0.0
                
    print("Training finished.")
    plt.figure()
    plt.plot(loss_list)
    plt.title("Training Recurrent Autoencoder")
    plt.xlabel("Run")
    plt.ylabel("Loss")
    plt.show()
    
train_model(train_loader, rvae)