In [3]:
import tensorflow as tf
import tensorflow.keras.utils as tfu
import tensorflow.keras.models as tfm
import tensorflow.keras.layers as tfl
import tensorflow.keras.callbacks as tfc
import tensorflow.keras.regularizers as tfr
import tensorflow.keras.initializers as tfi
import sklearn.model_selection as skm
import numpy as np
import matplotlib.pyplot as plt
import random
import re
import os
import keras_flops as kf
import time

SEED = 42
IMAGE_HEIGHT = 40
IMAGE_WIDTH = 40
SAMPLES_PER_MEASUREMENT = 119
LINES_PER_MEASUREMENT = SAMPLES_PER_MEASUREMENT + 1
IMAGE_WIDTH_HEIGHT_INDEX = IMAGE_WIDTH - 1

np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

def representative_dataset(data_set):
    for sample in data_set:
        yield [np.expand_dims(sample, 0)]

def collect_model_summary(summary_line, model_dict):
    match = re.match(r"(.*?): ([\d,]+)", summary_line)
    if match:
        match = match.groups()
        model_dict[match[0].replace("params", "parameters")] = int(match[1].replace(',', ''))

def get_stroke_samples(data):
    orientation_samples = np.zeros((SAMPLES_PER_MEASUREMENT, 3))
    stroke_samples = np.zeros((SAMPLES_PER_MEASUREMENT, 2))
    rows_of_samples = [list(map(lambda x: float(x), line.split(','))) for line in data.split('\n') if line]

    for i in range(0, len(rows_of_samples), SAMPLES_PER_MEASUREMENT): 
        measurment = np.array(rows_of_samples[i: i+SAMPLES_PER_MEASUREMENT])
        acceleration_average = np.average(measurment[:, 0:3], axis=0)

        # calcualte orientation
        previous_orientation = np.zeros(3)
        for j, gyro_sample in enumerate(measurment[:, 3:6]):
            orientation_samples[j] = previous_orientation + gyro_sample / SAMPLES_PER_MEASUREMENT
            previous_orientation = orientation_samples[j]     
        orientation_avg = np.average(orientation_samples, axis=0) # average orientation

        # calculate stroke
        acceleration_magnitude = np.sqrt(acceleration_average.dot(acceleration_average.T)) # dot product insted of squaring
        acceleration_magnitude += (acceleration_magnitude < 0.0001) * 0.0001 # prevent division by 0
        normalzied_acceleration = acceleration_average / acceleration_magnitude
        normalized_orientation = orientation_samples - orientation_avg
        stroke_samples[:, 0] = -normalzied_acceleration[1] * normalized_orientation[:, 1] - normalzied_acceleration[2] * normalized_orientation[:, 2]
        stroke_samples[:, 1] =  normalzied_acceleration[1] * normalized_orientation[:, 2] - normalzied_acceleration[2] * normalized_orientation[:, 1]
        yield stroke_samples

def load_as_images(one_hot=True):
    data = ""
    labels = []
    for i, file_name in enumerate(os.listdir("../data")):
        file = open(f"../data/{file_name}", "r")
        file.readline() # skip header
        read_lines = file.read()
        labels += [i] * (read_lines.count("\n") // LINES_PER_MEASUREMENT)
        data += read_lines
        file.close()

    colors = np.linspace(255 - 2 * SAMPLES_PER_MEASUREMENT + 2, 255, SAMPLES_PER_MEASUREMENT) / 255
    images = np.zeros((len(labels), IMAGE_HEIGHT, IMAGE_WIDTH, 1), dtype=np.float32)

    for i, stroke_samples in enumerate(get_stroke_samples(data)): 
        # rasterize stroke
        stroke_samples -= np.min(stroke_samples, axis=0) # make samples in range from 0 to x
        pixels = np.round(stroke_samples * IMAGE_WIDTH_HEIGHT_INDEX / np.max(stroke_samples, axis=0), 0).astype(np.uint8) # normalize samples to the whole image
        image = np.zeros((IMAGE_WIDTH, IMAGE_HEIGHT))
        image[pixels[:, 1], pixels[:, 0]] = colors
        images[i] = image.reshape(IMAGE_WIDTH, IMAGE_HEIGHT, 1).astype(np.float32)

    X_train, X_test, y_train, y_test = skm.train_test_split(images, labels, test_size=0.2, random_state=SEED)
    if one_hot:
        # one-hot encoding of labels
        y_train = tfu.to_categorical(y_train, num_classes=5)
        y_test = tfu.to_categorical(y_test, num_classes=5)

    return X_train, X_test, y_train, y_test

def load_as_array(one_hot=True):
    data = ""
    labels = []
    for i, file_name in enumerate(os.listdir("../data")):
        file = open(f"../data/{file_name}", "r")
        file.readline() # skip header
        read_lines = file.read()
        labels += [i] * (read_lines.count("\n") // LINES_PER_MEASUREMENT)
        data += read_lines
        file.close()

    arrays = np.zeros((len(labels), 2 * SAMPLES_PER_MEASUREMENT), dtype=np.float32)

    for i, stroke_samples in enumerate(get_stroke_samples(data)): 
        stroke_samples -= np.min(stroke_samples, axis=0) # make samples in range from 0 to x
        stroke_samples /= np.max(stroke_samples, axis=0) # normalize values from 0 to 1
        arrays[i] = stroke_samples.reshape(-1)

    X_train, X_test, y_train, y_test = skm.train_test_split(arrays, labels, test_size=0.2, random_state=SEED)
    if one_hot:
        # one-hot encoding of labels
        y_train = tfu.to_categorical(y_train, num_classes=5)
        y_test = tfu.to_categorical(y_test, num_classes=5)

    return X_train, X_test, y_train, y_test


hidden_activation = tf.keras.layers.LeakyReLU(0.1)
dense_model = tfm.Sequential([
        tfl.Dense(units=100, activation=hidden_activation),
        tfl.Dense(units=5, activation="softmax")
])
conv_model = tfm.Sequential([
        tfl.Conv2D(filters=8, kernel_size=(5, 5), activation=hidden_activation, padding="valid"),
        tfl.MaxPool2D(),
        tfl.Conv2D(filters=16, kernel_size=(3, 3), activation=hidden_activation, padding="valid"),
        tfl.MaxPool2D(),
        tfl.Conv2D(filters=32, kernel_size=(3, 3), activation=hidden_activation, padding="valid"),
        tfl.MaxPool2D(),
        tfl.Conv2D(filters=64, kernel_size=(3, 3), activation=hidden_activation, padding="valid"),
        tfl.Conv2D(filters=32, kernel_size=(1, 1), activation=hidden_activation, padding="same"),
        tfl.Conv2D(filters=5, kernel_size=(1, 1), activation="softmax", padding="same"),
        tfl.Reshape([5])
])

X_train, X_test, y_train, y_test = load_as_array()

# get weights for the given seed
dense_model.build(X_train.shape)
weights = dense_model.get_weights()

# get the best number of epochs based on validation data set
dense_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
history = dense_model.fit(X_train, y_train, epochs=100, validation_split=0.2, batch_size=16, verbose=0,
                          callbacks=[tfc.EarlyStopping(monitor="val_accuracy", patience=3, mode="max", restore_best_weights=False)]).history
dense_model.set_weights(weights)
epochs = len(history["loss"]) - 3

# train on the whole train data set
dense_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
train_start = time.time()
dense_model.fit(X_train, y_train, epochs=epochs, validation_split=0.0, batch_size=16, verbose=0)
train_time = f"{time.time() - train_start:.2f} s"

# evaluate the results
print(f"Model accuracy on the test data set: {dense_model.evaluate(X_test, y_test, verbose=0)[1] * 100:.2f} %")

# get the summary of the model
params = {}
dense_model.summary(print_fn=lambda x, y=params: collect_model_summary(x, y))
for key, value in params.items():
    print(f"{key}: {value}")
print(f"FLOPS: {kf.get_flops(dense_model, batch_size=1)}")

converter = tf.lite.TFLiteConverter.from_keras_model(dense_model)
tflite_model = converter.convert()
results_file = open(f"dense_model.tflite", "wb")
results_file.write(tflite_model)
results_file.close()
print(f"Model size: {os.path.getsize('dense_model.tflite')}")
os.system(f"rm -f dense_model.tflite")
del tflite_model

converter = tf.lite.TFLiteConverter.from_keras_model(dense_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
converter.representative_dataset = lambda x=X_train: representative_dataset(x)
tflite_model_opt = converter.convert()
results_file = open(f"dense_model.tflite", "wb")
results_file.write(tflite_model_opt)
results_file.close()
print(f"Optimized model size: {os.path.getsize(f'dense_model.tflite')}")
os.system(f'echo "const unsigned char model[] = {{" > dense_inference/model.h && cat dense_model.tflite | xxd -i >> dense_inference/model.h && echo "}};" >> dense_inference/model.h && rm -f dense_model.tflite')

interpreter = tf.lite.Interpreter(model_content=tflite_model_opt)
interpreter.allocate_tensors()
input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]
input_scale, input_zero_point = interpreter.get_output_details()[0]["quantization"]
accuracy = 0
for i, sample in enumerate(X_test):
    interpreter.set_tensor(input_index, np.expand_dims(sample / input_scale + input_zero_point, 0).astype(np.int8))
    interpreter.invoke()
    accuracy += np.argmax(y_test[i]) == np.argmax(interpreter.get_tensor(output_index)[0]) # rescaling is not needed
print(f"Optimaized model accuracy on the test data set: {accuracy / X_test.shape[0] * 100:.2f} %")
del tflite_model_opt


Model accuracy on the test data set: 93.08 %
Total parameters: 24405
Trainable parameters: 24405
Non-trainable parameters: 0


2022-12-31 19:27:07.612361: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-31 19:27:07.612646: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2022-12-31 19:27:07.612707: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2022-12-31 19:27:07.613015: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-31 19:27:07.613253: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1934] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and 


-max_depth                  10000
-min_bytes                  0
-min_peak_bytes             0
-min_residual_bytes         0
-min_output_bytes           0
-min_micros                 0
-min_accelerator_micros     0
-min_cpu_micros             0
-min_params                 0
-min_float_ops              1
-min_occurrence             0
-step                       -1
-order_by                   float_ops
-account_type_regexes       .*
-start_name_regexes         .*
-trim_name_regexes          
-show_name_regexes          .*
-hide_name_regexes          
-account_displayed_op_only  true
-select                     float_ops
-output                     stdout:


Doc:
scope: The nodes in the model graph are organized by their names, which is hierarchical like filesystem.
flops: Number of float operations. Note: Please read the implementation for the math behind it.

ProfiFLOPS: 48730
le:
node name | # float_ops
_TFProfRoot (--/48.73k flops)
  sequential_4/dense_4/MatMul (47.60k/47.60k flops)
 

INFO:tensorflow:Assets written to: /tmp/tmp42szbaym/assets
2022-12-31 19:27:08.008492: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-12-31 19:27:08.008516: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-12-31 19:27:08.008628: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmp42szbaym
2022-12-31 19:27:08.009231: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2022-12-31 19:27:08.009252: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmp42szbaym
2022-12-31 19:27:08.011436: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2022-12-31 19:27:08.034212: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: /tmp/tmp42szbaym
2022-12-31 19:27:08.039608: I tensorflow/cc/saved_model/loader.cc:305] SavedModel

Model size: 99488
INFO:tensorflow:Assets written to: /tmp/tmpt94ahz75/assets


INFO:tensorflow:Assets written to: /tmp/tmpt94ahz75/assets


Optimized model size: 26928
Optimaized model accuracy on the test data set: 93.08 %


2022-12-31 19:27:08.432008: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-12-31 19:27:08.432032: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-12-31 19:27:08.432145: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpt94ahz75
2022-12-31 19:27:08.432729: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2022-12-31 19:27:08.432745: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmpt94ahz75
2022-12-31 19:27:08.434704: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2022-12-31 19:27:08.457240: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: /tmp/tmpt94ahz75
2022-12-31 19:27:08.462832: I tensorflow/cc/saved_model/loader.cc:305] SavedModel load for tags { serve }; Status: success: OK. Took 30686 m