# 2.1. DTW-Based distance matrix

Calculating the distance matrix of pairwaise DTW distances between processed signals. Change `DTW_SAVE_PATH_3CLASS` in the `src.knn_load_data` file to adjust the output path.

In [1]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
from time import perf_counter

import sys 
sys.path.append('..')
from src.constants import SAMPLES_DICT_3CLASS
from src.knn_load_data import (
    INDICES_SAVE_PATH_3CLASS, DTW_SAVE_PATH_3CLASS,
    load_data, prefix_data_dir
)
from src.dtw import dist_matrix_parallel
from src.signal_processing import SignalProcessor

In [2]:
SAMPLES_DICT_3CLASS = prefix_data_dir(SAMPLES_DICT_3CLASS, "/mmfs1/gscratch/ml4ml/cailinw/pore_data/")

### Load and process data

In [3]:
data, labels, samples, indices, labels_key = load_data(
    samples_dict=SAMPLES_DICT_3CLASS,
    save_dir=INDICES_SAVE_PATH_3CLASS,
    load_raw_signals=True
)
num_classes = 3

class_names=['heart', 'adrenal', 'aorta']
num_classes=3
num_samples=[4, 4, 4]
In progress...
Class aorta, replicate 4/4/4

In [4]:
signal_processor = SignalProcessor(
    norm_method='nonorm',
    downsample=True, downsample_rate=1000,
    pad=True, pad_value=2, pad_len=100,
)
data, labels, samples, indices = signal_processor.transform(data, labels, samples, indices)
print(data.shape)

100%|██████████| 39360/39360 [00:00<00:00, 40887.40it/s]


(39360, 100)


### Compute distance matrix

In [5]:
t0_outfnc = perf_counter()
dist = dist_matrix_parallel(data, win=0.4)
print("Finished computing matrix in %0.3fs minutes" % ((perf_counter()-t0_outfnc)/60))

Computing distances...started processing window (0,0)
Pooled in 4.669s
Computing distances...started processing window (0,13122)
Computing distances...started processing window (0,26244)
Computing distances...started processing window (13122,0)
Computing distances...started processing window (13122,13122)
Computing distances...started processing window (13122,26244)
Computing distances...started processing window (26244,0)
Computing distances...started processing window (26244,13122)
Computing distances...started processing window (26244,26244)
Matrix computed in 25718.007s
Symmetrized in 30.492s
Finished computing matrix in 429.441s minutes


In [6]:
import logging
fname = f"{DTW_SAVE_PATH_3CLASS}nonorm_nosmooth_100_nopad_3class"
np.save(fname, dist)
!chmod 444 "$fname"".npy"
logging.info("Saved everything successfully")
logging.info("You can find the matrix at %s .npy" % (DTW_SAVE_PATH_3CLASS))