## Spectrogram autoencoder

In [1]:
from data_discovery_helpers.PreProcessedData import PreProcessedData
import numpy as np
import os
from typing import Tuple
from pydantic import BaseModel
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Flatten, Conv1D, MaxPooling1D
from tensorflow.keras.optimizers import SGD, Adam

In [2]:
def load_participant_data(p_id: str, feat_path: str):
    feats = np.load(os.path.join(feat_path,f'{p_id}_feat.npy'))
    spec = np.load(os.path.join(feat_path,f'{p_id}_spec.npy'))
    
    return feats,spec

In [29]:
p_ids = ['sub-01', 'sub-02', 'sub-03', 'sub-04', 'sub-05', 'sub-06', 'sub-07', 'sub-08', 'sub-09', 'sub-10']
DEFAULT_FEATURES_PATH = r'./features'
feats, spec = load_participant_data(p_ids[0],DEFAULT_FEATURES_PATH)
print(f"Feat shape: {feats.shape}")
print(f"Spec shape: {spec.shape}")

Feat shape: (29985, 1143)
Spec shape: (29985, 23)


In [31]:
def make_timeseries_instances(feats:np.ndarray, spec: np.ndarray,input_window_size:int,output_window_size:int)->Tuple[np.ndarray]:
    """Make the input and the output for the NN.
    
    timeseries: timeseries vector
    input_window_size: from have many days we want to predict the future
    output_window_size: how many days we want to predict in the future
    """
    
    X = [] #input data for the NN
    Y = [] #output data for the NN   
    
    t = feats.shape[0]
    
    for idx in range(t-(input_window_size+output_window_size)):
        x = np.asarray(feats[idx:idx+input_window_size])
        y = np.asarray(spec[(idx+input_window_size):(idx+input_window_size+output_window_size)])
        
        X.append(x)
        Y.append(y[0])
        
    # normalization    
    return (np.asarray(X),np.asarray(Y)) 

In [32]:
print(f"Feat shape: {feats.shape}")
print(f"Spec shape: {spec.shape}")
X,Y = make_timeseries_instances(feats,spec,12,1)
print(f"X shape: {X.shape}")
print(f"Y shape: {Y.shape}")
print(X)

Feat shape: (29985, 1143)
Spec shape: (29985, 23)
X shape: (29972, 12, 1143)
Y shape: (29972, 23)
[[[2.22035436 2.73808935 2.85690161 ... 2.28213407 2.23484694 2.50348484]
  [2.39475793 2.70724067 3.01786318 ... 2.22221548 2.12541089 2.29082713]
  [3.09624351 2.53434615 3.6592359  ... 1.71764827 1.90184825 2.20471851]
  ...
  [3.77647414 4.51945198 2.42160424 ... 2.42756598 3.03646729 2.93255736]
  [3.57035723 4.58016079 2.75856102 ... 3.25909295 3.51799849 3.50280557]
  [3.5842496  4.28712111 3.15903961 ... 3.61000989 3.78155424 3.44510973]]

 [[2.39475793 2.70724067 3.01786318 ... 2.22221548 2.12541089 2.29082713]
  [3.09624351 2.53434615 3.6592359  ... 1.71764827 1.90184825 2.20471851]
  [4.02233416 2.83700801 4.48158457 ... 2.00716342 2.23391262 2.45765985]
  ...
  [3.57035723 4.58016079 2.75856102 ... 3.25909295 3.51799849 3.50280557]
  [3.5842496  4.28712111 3.15903961 ... 3.61000989 3.78155424 3.44510973]
  [2.72810797 4.10928657 3.67309008 ... 3.38809066 3.81723014 3.22110608]]

In [33]:
def split_data(X:np.ndarray,Y:np.ndarray,valid_split: float = 0.15, test_split: float = 0.15):
    nb_samples = X.shape[0]
    valid_size = int(nb_samples*(1-test_split-valid_split))
    test_size = int(nb_samples*(1-test_split))
    X_train, Y_train = X[:valid_size], Y[:valid_size]
    X_valid, Y_valid = X[valid_size:test_size], Y[valid_size:test_size]
    X_test, Y_test   = X[test_size:], Y[test_size:]
    
    return {"train": [X_train, Y_train],
            "valid": [X_valid,Y_valid],
            "test": [X_test,Y_test]}

In [34]:
ds_dict = split_data(X,Y)
X_train,Y_train=ds_dict["train"]
X_valid,Y_valid=ds_dict["valid"]
X_test,Y_test=ds_dict["test"]

print(X_train.shape)
print(Y_train.shape)

print(X_valid.shape)
print(Y_valid.shape)

print(X_test.shape)
print(Y_test.shape)

(20980, 12, 1143)
(20980, 23)
(4496, 12, 1143)
(4496, 23)
(4496, 12, 1143)
(4496, 23)


In [35]:
def make_1d_convnet(window_size, filter_length,nb_filter,nb_input_series=1, nb_outputs=1,lr=0.0001):

    model = Sequential()
 
    model.add(Conv1D(filters=nb_filter, kernel_size=filter_length, activation='relu', input_shape=(window_size, nb_input_series)))
    model.add(MaxPooling1D())
    model.add(Flatten())
    model.add(Dense(nb_outputs, activation='linear'))

    optimizer=Adam(lr=lr) 
    
    model.compile(loss='mse', optimizer=optimizer, metrics=['mae'])
    return model

In [36]:
class ArchConfig(BaseModel):
    window_size: int
    filter_length: int
    nb_filter: int
    lr: float

In [37]:
class TrainConfig(BaseModel):
    valid_split: float
    test_split: float
    epochs: int
    batch_size: int

In [38]:
def train(feats,spec,arch_cfg,train_cfg):
    input_channel_num = feats.shape[-1]
    output_channel_num = spec.shape[-1]
    print(input_channel_num)
    print(output_channel_num)
    X,Y = make_timeseries_instances(feats,spec,12,1)
    ds = split_data(X, Y,train_cfg.valid_split,train_cfg.test_split)
    
    X_train,Y_train = ds['train']
    model = make_1d_convnet(window_size=arch_cfg.window_size, filter_length=arch_cfg.filter_length,nb_filter=arch_cfg.nb_filter,nb_input_series=input_channel_num, nb_outputs=output_channel_num,
                            lr=arch_cfg.lr)
    #model.summary()
    
    model.fit(X_train, Y_train, epochs=train_cfg.epochs, batch_size=train_cfg.batch_size, validation_data=ds['valid'], verbose=2)
    
    return model

In [None]:
arch_cfg = ArchConfig(window_size=12,filter_length=4,nb_filter=4,lr=0.0003)
train_cfg = TrainConfig(test_split=0.15,valid_split=0.15,epochs=30,batch_size=32)
model=train(feats,spec,arch_cfg,train_cfg)

1143
23
Epoch 1/30
656/656 - 14s - loss: 25.3964 - mae: 4.6090 - val_loss: 23.2193 - val_mae: 4.4345 - 14s/epoch - 21ms/step
Epoch 2/30
656/656 - 7s - loss: 23.4905 - mae: 4.4107 - val_loss: 21.5617 - val_mae: 4.2436 - 7s/epoch - 10ms/step
Epoch 3/30
656/656 - 4s - loss: 21.8529 - mae: 4.2211 - val_loss: 19.9971 - val_mae: 4.0553 - 4s/epoch - 6ms/step
Epoch 4/30
656/656 - 4s - loss: 20.3060 - mae: 4.0342 - val_loss: 18.5205 - val_mae: 3.8692 - 4s/epoch - 6ms/step
Epoch 5/30
656/656 - 4s - loss: 18.8439 - mae: 3.8487 - val_loss: 17.1267 - val_mae: 3.6850 - 4s/epoch - 6ms/step
Epoch 6/30
656/656 - 4s - loss: 17.4615 - mae: 3.6655 - val_loss: 15.8091 - val_mae: 3.5021 - 4s/epoch - 6ms/step
Epoch 7/30
656/656 - 4s - loss: 16.1554 - mae: 3.4831 - val_loss: 14.5671 - val_mae: 3.3205 - 4s/epoch - 7ms/step
Epoch 8/30
656/656 - 4s - loss: 14.9239 - mae: 3.3021 - val_loss: 13.3979 - val_mae: 3.1401 - 4s/epoch - 6ms/step
Epoch 9/30
656/656 - 4s - loss: 13.7640 - mae: 3.1224 - val_loss: 12.2999 - 