In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf
from preprocessing import *
import tensorflow_model_optimization as tfmot
from sklearn.model_selection import ParameterGrid
from time import time
import zipfile
import numpy as np

In [2]:
def get_model(SHAPE, alpha):
    model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=SHAPE),
    tf.keras.layers.Conv2D(filters=128*alpha, kernel_size=[3, 3], strides=[2, 2],
        use_bias=False, padding='valid'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=128*alpha, kernel_size=[3, 3], strides=[1, 1],
        use_bias=False, padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=128*alpha, kernel_size=[3, 3], strides=[1, 1],
        use_bias=False, padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(units=len(LABELS)),
    tf.keras.layers.Softmax()
    ])

    return model

In [3]:
PARAMS = {
    'frame_length_in_s': [0.02, 0.04], #[0.02,0.04,0.06,0.08],
    'frame_step_in_s': [0.01, 0.02], #[0.005, 0.01, 0.015, 0.02],
    'num_mel_bins': [20, 40], #[20, 30, 40],
    'lower_frequency': [20], #[10,20,30],
    'upper_frequency': [4000], #[3000, 4000, 5000],
    'batch_size': [5, 10], #[10, 20, 30],
    'epochs': [10, 15], #[10, 20, 30],
    'initial_learning_rate': [0.01], #[0.005, 0.01, 0.02],
    'end_learning_rate': [1.e-5], #[1.e-3, 1.e-5, 1.e-6],
    'num_mfccs_features' : [13, -1], #[10, 15, -1]
    'alpha': [0.15, 0.25] #[0.15, 0.25, 0.33]
}

configurations = {"configurations": []}
my_configs = ParameterGrid(PARAMS)
for config in my_configs:
    configurations["configurations"].append(config)

print("Possible configurations found: {}".format(len(configurations["configurations"])))

Possible configurations found: 64


In [4]:
def _log_header_to_csv(filename : str, header : str) -> bool:
    completed = False
    try:
        with open(filename, "w") as header_fp:
            header_fp.write(header + "\n")
        completed = True
    except Exception as e:
        print(e.format_exc())
    finally:
        return completed

def _log_output_to_csv(filename : str, content : str) -> bool:
    completed = False
    try:
        with open(filename, "a") as log_fp:
            log_fp.write(content + "\n")
        completed = True
    except Exception as e:
        print(e.format_exc())
    finally:
        return completed

In [5]:
from functools import partial


header = ','.join(k for k in PARAMS.keys()) + "," + ','.join([
    "accuracy", "avg_preprocessing_latency", "avg_model_latency", \
         "median_total_latency", "model_size", "compressed_model_size"
         ])

_log_header_to_csv(filename="hw2_log_final.csv", header=header)

train_file_ds = tf.data.Dataset.list_files(['complete_training_data/*.wav'])
validation_file_ds = tf.data.Dataset.list_files(['complete_validation_data/*.wav'])
test_file_ds = tf.data.Dataset.list_files(['complete_test_data/*.wav'])

for idx in tqdm(range(len(configurations["configurations"]))):
    config = configurations["configurations"][idx]
    print(config)

    MEL_LOG_ARGS = {
        'frame_length_in_s': config['frame_length_in_s'],
        'frame_step_in_s': config['frame_step_in_s'],
        'num_mel_bins': config['num_mel_bins'],
        'lower_frequency': config['lower_frequency'],
        'upper_frequency': config['upper_frequency']
    }
    TRAINING_ARGS= {
        'batch_size': config['batch_size'],
        'epochs': config['epochs'],
        'initial_learning_rate': config['initial_learning_rate'],
        'end_learning_rate': config['end_learning_rate']
    }

    batch_size = TRAINING_ARGS['batch_size']
    epoch = TRAINING_ARGS['epochs']


    get_frozen_log_mel_spectrogram = partial(get_log_mel_spectrogram, **MEL_LOG_ARGS)
    train_mel_ds = train_file_ds.map(get_frozen_log_mel_spectrogram)
    for spectrogram, label in train_mel_ds.take(1):
        SHAPE = spectrogram.shape

    def preprocess_with_resized_mel(filename):
        log_mel_spectrogram, label = get_frozen_log_mel_spectrogram(filename)
        log_mel_spectrogram.set_shape(SHAPE)
        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)[..., :]
        mfccs = tf.expand_dims(mfccs, -1)
        mfccs = tf.image.resize(mfccs, [32, 32])
        label_id = tf.argmax(label == LABELS)

        return mfccs, label_id

    train_ds = train_file_ds.map(preprocess_with_resized_mel).batch(batch_size)
    validation_ds = validation_file_ds.map(preprocess_with_resized_mel).batch(batch_size)
    test_ds = test_file_ds.map(preprocess_with_resized_mel).batch(batch_size)

    for example_batch, example_labels in train_ds.take(1):
        print(example_batch.shape)
        print(example_labels)

    model = get_model(SHAPE=example_batch.shape[1:], alpha=config['alpha'])

    prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

    begin_step = int(len(train_ds) * epoch * 0.2)
    end_step = int(len(train_ds) * epoch)
    final_sparsity=0.70
    pruning_params = {
        'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
            initial_sparsity=0.20,
            final_sparsity=final_sparsity,
            begin_step=begin_step,
            end_step=end_step
        )
    }

    model_for_pruning = prune_low_magnitude(model, **pruning_params)

    loss = tf.losses.SparseCategoricalCrossentropy(from_logits=False)

    initial_learning_rate = TRAINING_ARGS['initial_learning_rate']
    end_learning_rate = TRAINING_ARGS['end_learning_rate']

    linear_decay = tf.keras.optimizers.schedules.PolynomialDecay(
        initial_learning_rate=initial_learning_rate,
        end_learning_rate=end_learning_rate,
        decay_steps=len(train_ds) * epoch,
    )
    optimizer = tf.optimizers.Adam(learning_rate=linear_decay)
    metrics = [tf.metrics.SparseCategoricalAccuracy()]
    callbacks = [tfmot.sparsity.keras.UpdatePruningStep()]
    model_for_pruning.compile(loss=loss, optimizer=optimizer, metrics=metrics)

    history = model_for_pruning.fit(train_ds, epochs=epoch, validation_data=validation_ds, callbacks=callbacks)
    # computing statistics
    # test_loss, test_accuracy = model_for_pruning.evaluate(test_ds)
    training_loss = history.history['loss'][-1]
    training_accuracy = history.history['sparse_categorical_accuracy'][-1]
    val_loss = history.history['val_loss'][-1]
    val_accuracy = history.history['val_sparse_categorical_accuracy'][-1]
    timestamp = int(time())

    model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)
    saved_model_dir = f'./saved_models/{timestamp}'
    if not os.path.exists(saved_model_dir):
        os.makedirs(saved_model_dir)
    model_for_export.save(saved_model_dir)

    # model conversion to tf-lite format
    MODEL_NAME = timestamp
    ZIPPED_MODEL_NAME = MODEL_NAME
    converter = tf.lite.TFLiteConverter.from_saved_model(f'./saved_models/{MODEL_NAME}')
    tflite_model = converter.convert()

    # saving tf-lite formatted model
    tflite_models_dir = './tflite_models'
    if not os.path.exists(tflite_models_dir):
        os.makedirs(tflite_models_dir)
    tflite_model_name = os.path.join(tflite_models_dir, f'{MODEL_NAME}.tflite')
    with open(tflite_model_name, 'wb') as fp:
        fp.write(tflite_model)

    # save the zipped model
    if not os.path.exists("./zipped_models"):
        os.makedirs("./zipped_models")
    with zipfile.ZipFile(f'{os.path.join("./zipped_models",str(ZIPPED_MODEL_NAME))}.zip', 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(tflite_model_name)

    # performing inference
    interpreter = tf.lite.Interpreter(model_path=f'tflite_models/{MODEL_NAME}.tflite')
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    avg_preprocessing_latency = 0.0
    avg_model_latency = 0.0
    latencies = []
    accuracy = 0.0
    filenames = glob("complete_test_data/*.wav")
    for fname in filenames:
        start_preprocess = time()

        log_mel_spectrogram, true_label = get_frozen_log_mel_spectrogram(filename=fname)

        mfccs = mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)[..., :config['num_mfccs_features']]
        mfccs = tf.expand_dims(mfccs, 0)
        mfccs = tf.expand_dims(mfccs, -1)
        mfccs = tf.image.resize(mfccs, [32, 32])

        end_preprocess = time()
        
        interpreter.set_tensor(input_details[0]['index'], mfccs) 
        interpreter.invoke()
        output = interpreter.get_tensor(output_details[0]['index'])

        end_inference = time()

        top_index = np.argmax(output[0])
        predicted_label = LABELS[top_index]

        accuracy += true_label == predicted_label
        avg_preprocessing_latency += end_preprocess - start_preprocess
        avg_model_latency += end_inference - end_preprocess
        latencies.append(end_inference - start_preprocess)

    accuracy /= len(filenames)
    avg_preprocessing_latency /= len(filenames)
    avg_model_latency /= len(filenames)
    median_total_latency = np.median(latencies)

    model_size = os.path.getsize(f'tflite_models/{MODEL_NAME}.tflite')
    compressed_model_size = os.path.getsize(f"zipped_models/{ZIPPED_MODEL_NAME}.zip")

    content = f"{config['frame_length_in_s']},\
        {config['frame_step_in_s']},{config['num_mel_bins']},\
            {config['lower_frequency']},{config['upper_frequency']},\
                {config['batch_size']},{config['epochs']},{config['initial_learning_rate']},\
                    {config['end_learning_rate']},{config['num_mfccs_features']},\
                        {config['alpha']},{100 * accuracy:.3f},{1000 * avg_preprocessing_latency:.1f},\
                            {1000 * avg_model_latency:.1f},{1000 * median_total_latency:.1f},\
                                {model_size / 2 ** 10:.1f},{compressed_model_size / 2 ** 10:.1f}\n"

    _log_output_to_csv(filename="hw2_log_final.csv", content=content)

  0%|          | 0/64 [00:00<?, ?it/s]

{'alpha': 0.15, 'batch_size': 20, 'end_learning_rate': 1e-05, 'epochs': 15, 'frame_length_in_s': 0.02, 'frame_step_in_s': 0.01, 'initial_learning_rate': 0.01, 'lower_frequency': 20, 'num_mel_bins': 20, 'num_mfccs_features': 13, 'upper_frequency': 4000}
(20, 32, 32, 1)
tf.Tensor([1 1 2 2 0 2 1 1 2 1 2 2 1 1 2 2 1 1 1 1], shape=(20,), dtype=int64)
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
Epoch 1/15
Epoch 2/15
Epoch 3/15

  0%|          | 0/64 [01:21<?, ?it/s]


KeyboardInterrupt: 