# CNN-based Experiments with the VTech Dataset

In [4]:
# Standard library
import warnings
import math

# Third party
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras

# Local
import utils



## Load dataset

In [5]:
df = utils.load_dataset(f"s3://cpac/ORIG/VTech/_VTech_merged.csv")

## Dataset information

In [6]:
print(f"Number of workers: {len(df['worker'].unique())}")
for worker, worker_df in df.groupby("worker"):
    print(f"\tWorker {worker}: {len(worker_df['day'].unique())} days"
          f", {len(worker_df['trial'].unique())} trials"
          f", {worker_df['mode'].diff().abs().sum():3.0f} transitions"
          f", {len(worker_df):10,} samples")
print(f"Total number of transitions: {df['mode'].diff().abs().sum()}")


Number of workers: 4
	Worker 1: 1 days, 5 trials,  28 transitions,  1,448,215 samples
	Worker 2: 3 days, 4 trials,  68 transitions,  1,351,866 samples
	Worker 3: 1 days, 2 trials,  59 transitions,    859,697 samples
	Worker 4: 1 days, 6 trials, 110 transitions,  1,646,663 samples
Total number of transitions: 266.0


## Sequencing 

In [7]:
class VTechSeq(keras.utils.Sequence):
    
    def __init__(self, df, history, batch_size):
        self.df = df
        self.history = history
        self.batch_size = batch_size
        self.trials = []
        self.n_seqs = []
        
        for _, group in df.groupby(["worker", "trial"]):
            trial = group[:-(len(group) % history)]
            self.trials.append(trial)
            self.n_seqs.append(len(trial) - history + 1)

    def __len__(self):
        return math.ceil(sum(self.n_seqs) / self.batch_size)

    def __getitem__(self, batch_idx):
        batch_features = []
        batch_labels = []
        for seq_idx in range(self.batch_size * batch_idx , 
                             self.batch_size * (batch_idx + 1)):
            seq_offset = 0
            for trial, n_seq in zip(self.trials, self.n_seqs):
                trial_seq = seq_idx - seq_offset
                if 0 <= trial_seq < n_seq:
                    cut = trial.iloc[trial_seq:trial_seq + self.history]
                    features = cut.loc[:, "orientation_T8_q0":"jointangles_LeftUpper_z"].values
                    batch_features.append(np.swapaxes(features, 0, 1)[:, np.newaxis, :])
                    batch_labels.append(trial.iloc[trial_seq + self.history]["mode"])
                    break
                seq_offset += n_seq
                
        return np.array(batch_features), np.array(batch_labels)
        

In [8]:
seqs = VTechSeq(df, 100, 128)

In [22]:
model = keras.Sequential(
    [
        keras.layers.Input(shape=(seqs[0][0].shape[1:])),
        keras.layers.Flatten(),
        keras.layers.Dense(10, activation="relu", name="layer1"),
        keras.layers.Dense(3, activation="relu", name="layer2"),
        keras.layers.Dense(4, name="layer3"),
    ]
)
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 6600)              0         
_________________________________________________________________
layer1 (Dense)               (None, 2)                 13202     
_________________________________________________________________
layer2 (Dense)               (None, 3)                 9         
_________________________________________________________________
layer3 (Dense)               (None, 4)                 16        
Total params: 13,227
Trainable params: 13,227
Non-trainable params: 0
_________________________________________________________________


In [23]:
normalization = keras.layers.experimental.preprocessing.Normalization()

In [32]:
tf.data.Dataset.from_generator(seqs, (tf.dtypes.float64, tf.dtypes.float64))

TypeError: `generator` must be callable.

In [35]:
144821 / 25 / 60

96.54733333333334