In [10]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow import keras
from os import listdir, path
import numpy as np
from collections import defaultdict
import datetime
import random

random.seed(42) # Keep the order stable everytime shuffling the files while creating training datasets

## Global variables

In [11]:
seq_length  = 36 # This will be used to keep the fixed input size for the first CNN layer
dim         = 6  # Number of datapoints in a single reading accX,accY,accZ,gyrX,gyrY,gyrZ
num_classes = 10 # Number of output classes [0,9] 

## Sequence Padding
#### When collecting sequence data, individual samples have different lengths. Since the input data for a convolutional neural network  must be a single tensor, samples need to be padded. The sequence are padded at the beginning and at the end with neighboring values.

In [12]:
def padding(data):
    padded_data = []
    noise_level = [ 20, 20, 20, 0.2, 0.2, 0.2 ]
    
    tmp_data = (np.random.rand(seq_length, dim) - 0.5) * noise_level + data[0]
    tmp_data[(seq_length - min(len(data), seq_length)):] = data[:min(len(data), seq_length)]
    padded_data.append(tmp_data)

    tmp_data = (np.random.rand(seq_length, dim) - 0.5) * noise_level + data[-1]
    tmp_data[:min(len(data), seq_length)] = data[:min(len(data), seq_length)]
    
    padded_data.append(tmp_data)
    return padded_data

## Convert to TensorFlow dataset, keeps data and labels together


In [13]:
def build_dataset(data, label):
    # Add 2 padding, initialize data and label
    padded_num = 2
    length = len(data) * padded_num
    features = np.zeros((length, seq_length, dim))
    labels = np.zeros(length)
    # Get padding for train, valid and test
    for idx, (data, label) in enumerate(zip(data, label)):
        padded_data = padding(data)
        for num in range(padded_num):
            features[padded_num * idx + num] = padded_data[num]
            labels[padded_num * idx + num] = label
    # Turn into tf.data.Dataset
    dataset = tf.data.Dataset.from_tensor_slices((features, labels.astype("int32")))
    return length, dataset

## Time Warping

In [14]:
def time_warping(molecule, denominator, data):
  tmp_data = [[0 for i in range(len(data[0]))] for j in range((int(len(data) / molecule) - 1) * denominator)]
    
  for i in range(int(len(data) / molecule) - 1):
    for j in range(len(data[i])):
      for k in range(denominator):
        tmp_data[denominator * i + k][j] = (data[molecule * i + k][j] * (denominator - k) 
                                            + data[molecule * i + k + 1][j] * k) / denominator
  return tmp_data


## Data augmentation

In [15]:
def augment_data(original_data, original_label):
  new_data = []
  new_label = []
  for idx, (data, label) in enumerate(zip(original_data, original_label)):  # pylint: disable=unused-variable
    # Original data
    new_data.append(data)
    new_label.append(label)
    # Shift Sequence
    for num in range(5):  # pylint: disable=unused-variable
      new_data.append((np.array(data, dtype=np.float32) +
                       (random.random() - 0.5) * 200).tolist())
      new_label.append(label)
    # Add Random noise
    tmp_data = [[0 for i in range(len(data[0]))] for j in range(len(data))]
    for num in range(5):
      for i in range(len(tmp_data)):
        for j in range(len(tmp_data[i])):
          tmp_data[i][j] = data[i][j] + 5 * random.random()
      new_data.append(tmp_data)
      new_label.append(label)
    # Time warping
    fractions = [(3, 2), (5, 3), (2, 3), (3, 4), (9, 5), (6, 5), (4, 5)]
    for molecule, denominator in fractions:
      new_data.append(time_warping(molecule, denominator, data))
      new_label.append(label)
    # Movement amplification
    for molecule, denominator in fractions:
      new_data.append(
          (np.array(data, dtype=np.float32) * molecule / denominator).tolist())
      new_label.append(label)
  return new_data, new_label

## Load data from files

In [16]:
def load_data(data_type, files):
    data   = []
    labels = []
    random.shuffle(files)
   
    for file in files:
        with open(file) as f:
            label = path.splitext(file)[0][-1]
            labels.append(label)
            readings = []
            for line in f:
                reading = line.strip().split(',')
                readings.append([float(i) for i in reading[0:6]])

            data.append(readings)
            
    if data_type == 'train':
        data, labels = augment_data(data, labels)
    
    return build_dataset(data, labels)

## Prepare training, validation, and test datasets

In [17]:
files_path = defaultdict(list)
dir = './data'
for filename in listdir(dir):
    if filename.endswith('.csv'):
        digit = path.splitext(filename)[0][-1]
        files_path[digit].append(path.join(dir, filename))

train_files      = []
validation_files = []
test_files       = []

for digit in files_path:
    random.shuffle(files_path[digit])
    
    train_split = int(len(files_path[digit]) * 0.6) # 60%
    validation_split = train_split + int(len(files_path[digit]) * 0.2) # 20%

    train_files += files_path[digit][:train_split]
    validation_files += files_path[digit][train_split:validation_split]
    # remaining 20%
    test_files += files_path[digit][validation_split:]

train_length, train_data = load_data('train', train_files)
validation_length, validation_data = load_data('validation', validation_files)
test_length, test_data = load_data('test', test_files )

print('train_length={} validation_length={} test_length{}'.format(train_length, validation_length, test_length))

train_length=22200 validation_length=290 test_length312


## Build a sequential model

In [18]:
model = tf.keras.Sequential([
      tf.keras.layers.Conv2D(8, (3, 3), padding="same", activation="relu", input_shape=(seq_length, dim, 1)),
      tf.keras.layers.Conv2D(8, (3, 3), padding="same", activation="relu"),
      tf.keras.layers.MaxPool2D((2, 2)),
      tf.keras.layers.Dropout(0.1),
      tf.keras.layers.Conv2D(8, (3, 3), padding="same", activation="relu"),
      tf.keras.layers.MaxPool2D((2, 2), padding="same"),
      tf.keras.layers.Dropout(0.1),
      tf.keras.layers.Conv2D(16, (3, 3), padding="same", activation="relu"),
      tf.keras.layers.MaxPool2D((2, 2), padding="same"),
      tf.keras.layers.Dropout(0.1),
      tf.keras.layers.Conv2D(16, (3, 3), padding="same", activation="relu"),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(64, activation="relu"),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(32, activation="relu"),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(num_classes, activation="softmax")
  ])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## Compile and start training

In [None]:
epochs = 100
batch_size = 64
steps_per_epoch=1000

model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

def reshape_function(data, label):
  reshaped_data = tf.reshape(data, [-1, dim, 1])
  return reshaped_data, label

train_data = train_data.map(reshape_function)
validation_data = validation_data.map(reshape_function)

train_data = train_data.batch(batch_size).repeat()
validation_data = validation_data.batch(batch_size)

logdir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

# Uncomment the ines below if you like to see how training proceeds
# %load_ext tensorboard
# %tensorboard --logdir logdir

model.fit(
  train_data,
  epochs=epochs,
  validation_data=validation_data,
  steps_per_epoch=steps_per_epoch,
  validation_steps=int((validation_length - 1) / batch_size + 1),
  callbacks=[tensorboard_callback])

Epoch 1/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.1390 - loss: 2.4257 - val_accuracy: 0.1759 - val_loss: 2.1556
Epoch 2/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.2164 - loss: 2.0605 - val_accuracy: 0.3414 - val_loss: 1.7988
Epoch 3/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.3531 - loss: 1.8045 - val_accuracy: 0.4966 - val_loss: 1.4581
Epoch 4/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.4550 - loss: 1.5519 - val_accuracy: 0.6379 - val_loss: 1.1091
Epoch 5/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.5416 - loss: 1.3229 - val_accuracy: 0.6483 - val_loss: 0.9694
Epoch 6/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.6055 - loss: 1.1488 - val_accuracy: 0.6517 - val_loss: 0.9628
Epoch 7/10

## Evaluate the trained model on test dataset

In [None]:
test_data = test_data.map(reshape_function)
test_labels = np.zeros(test_length)

# There is no easy function to get the labels back from the tf.data.Dataset :(
# Need to iterate over dataset
idx = 0
for data, label in test_data:
    test_labels[idx] = label.numpy()
    idx += 1
    
test_data = test_data.batch(batch_size)

loss, acc = model.evaluate(test_data)
pred = np.argmax(model.predict(test_data), axis=1)

# Create a confusion matrix to see how model predicts
confusion = tf.math.confusion_matrix(labels=tf.constant(test_labels), predictions=tf.constant(pred), num_classes=num_classes)
print(confusion)

## Convert model to TFLite format 
### Note: Currently quantized TFLite format does not work with TFLite Micro library

In [24]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
open("model.tflite", "wb").write(tflite_model)

# Convert the model to the TensorFlow Lite format with quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
tflite_model = converter.convert()
open("model_quantized.tflite", "wb").write(tflite_model)


INFO:tensorflow:Assets written to: /var/folders/rs/75wzgcz56_j8r096v36m0p8c0000gn/T/tmpla5l6k2r/assets


INFO:tensorflow:Assets written to: /var/folders/rs/75wzgcz56_j8r096v36m0p8c0000gn/T/tmpla5l6k2r/assets


Saved artifact at '/var/folders/rs/75wzgcz56_j8r096v36m0p8c0000gn/T/tmpla5l6k2r'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 36, 6, 1), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  5760941328: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5760940944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5760941712: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5760941904: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5760941520: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5760941136: TensorSpec(shape=(), dtype=tf.resource, name=None)
  6299287824: TensorSpec(shape=(), dtype=tf.resource, name=None)
  6299289168: TensorSpec(shape=(), dtype=tf.resource, name=None)
  6299288784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  6299289744: TensorSpec(shape=(), dtype=tf.resource, name=None)
  6299289552: TensorSpec

W0000 00:00:1762335915.265519   79763 tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
W0000 00:00:1762335915.265527   79763 tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2025-11-05 15:15:15.265593: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/rs/75wzgcz56_j8r096v36m0p8c0000gn/T/tmpla5l6k2r
2025-11-05 15:15:15.266016: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-11-05 15:15:15.266019: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /var/folders/rs/75wzgcz56_j8r096v36m0p8c0000gn/T/tmpla5l6k2r
2025-11-05 15:15:15.270344: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-11-05 15:15:15.293931: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /var/folders/rs/75wzgcz56_j8r096v36m0p8c0000gn/T/tmpla5l6k2r
2025-11-05 15:15:15.301128: I tensorflow/cc/saved_model/loader.cc:

INFO:tensorflow:Assets written to: /var/folders/rs/75wzgcz56_j8r096v36m0p8c0000gn/T/tmpcmhi7mnt/assets


INFO:tensorflow:Assets written to: /var/folders/rs/75wzgcz56_j8r096v36m0p8c0000gn/T/tmpcmhi7mnt/assets


Saved artifact at '/var/folders/rs/75wzgcz56_j8r096v36m0p8c0000gn/T/tmpcmhi7mnt'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 36, 6, 1), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  5760941328: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5760940944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5760941712: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5760941904: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5760941520: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5760941136: TensorSpec(shape=(), dtype=tf.resource, name=None)
  6299287824: TensorSpec(shape=(), dtype=tf.resource, name=None)
  6299289168: TensorSpec(shape=(), dtype=tf.resource, name=None)
  6299288784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  6299289744: TensorSpec(shape=(), dtype=tf.resource, name=None)
  6299289552: TensorSpec

W0000 00:00:1762335915.571492   79763 tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
W0000 00:00:1762335915.571500   79763 tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2025-11-05 15:15:15.571573: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/rs/75wzgcz56_j8r096v36m0p8c0000gn/T/tmpcmhi7mnt
2025-11-05 15:15:15.572179: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-11-05 15:15:15.572183: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /var/folders/rs/75wzgcz56_j8r096v36m0p8c0000gn/T/tmpcmhi7mnt
2025-11-05 15:15:15.576575: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-11-05 15:15:15.600365: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /var/folders/rs/75wzgcz56_j8r096v36m0p8c0000gn/T/tmpcmhi7mnt
2025-11-05 15:15:15.607799: I tensorflow/cc/saved_model/loader.cc:

24928