In [1]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]="1"

import tensorflow as tf
import tensorflow_model_optimization as tfmot
from sklearn.model_selection import ParameterGrid

from time import time
from tqdm import tqdm
from functools import partial
from preprocessing import *
import utilities

2023-04-27 20:16:26.210358: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-04-27 20:16:26.233355: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-04-27 20:16:26.897007: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-04-27 20:16:26.897061: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


In [2]:
! rm -rf formatted_data
! rm -rf complete_test_data
! rm -rf complete_training_data
! rm -rf complete_validation_data

In [3]:
! rm -rf saved_models
! rm -rf tflite_models
! rm -rf zipped_models
! rm -rf results/*

In [4]:
crop_time = 3 #seconds
window_length = 2 #seconds
overlap_size = 1 #seconds
strategy = "CropAndWindow"

In [5]:
df = DatasetFormatter(crop_time=crop_time, window_length=window_length, overlap_size=overlap_size)
df.format_dataset(strategy=strategy)

Loading audio files informations...
Found 399 audio files.
Cropping audio files to 3 seconds before and after passing time. Saving to ./formatted_data/


  input_audiofile = siw.read(audio_path)
100%|██████████| 399/399 [00:02<00:00, 181.17it/s]


In [6]:
! mkdir complete_training_data
! mkdir complete_validation_data
! mkdir complete_test_data

! mv formatted_data/formatted_VWPassat/* complete_test_data
! mv formatted_data/formatted_CitroenC4Picasso/* complete_validation_data
! mv formatted_data/*/* complete_training_data

In [7]:
! rm -rf saved_models/*
! rm -rf tflite_models/*

zsh:1: no matches found: saved_models/*
zsh:1: no matches found: tflite_models/*


In [8]:
PARAMS = {
    'frame_length_in_s': [0.04], 
    'frame_step_in_s': [0.02], 
    'num_mel_bins': [20], 
    'lower_frequency': [1000],
    'upper_frequency': [7000], 
    'batch_size': [5], 
    'epochs': [5], 
    'initial_learning_rate': [0.01], 
    'end_learning_rate': [1.e-5], 
    'num_mfccs_features' : [-1], 
    'alpha': [0.15], 
    'num_hidden_layers':[5]
}

configurations = {"configurations": []}
my_configs = ParameterGrid(PARAMS)
for config in my_configs:
    configurations["configurations"].append(config)

print("Possible configurations found: {}".format(len(configurations["configurations"])))

Possible configurations found: 1


In [9]:
header = ','.join(k for k in PARAMS.keys()) + "," + ','.join([
    "accuracy", "avg_preprocessing_latency", "avg_model_latency", \
         "median_total_latency", "model_size", "compressed_model_size", "model_id"
         ])

utilities._log_header_to_csv(filename=f"{strategy}_cnn_results.csv", header=header)

train_file_ds = tf.data.Dataset.list_files(['complete_training_data/*.wav'])
validation_file_ds = tf.data.Dataset.list_files(['complete_validation_data/*.wav'])
test_file_ds = tf.data.Dataset.list_files(['complete_test_data/*.wav'])

for idx in tqdm(range(len(configurations["configurations"]))):

    config = configurations["configurations"][idx]

    print("Using configuration: \n", config)

    MEL_LOG_ARGS = {
        'frame_length_in_s': config['frame_length_in_s'],
        'frame_step_in_s': config['frame_step_in_s'],
        'num_mel_bins': config['num_mel_bins'],
        'lower_frequency': config['lower_frequency'],
        'upper_frequency': config['upper_frequency']
    }
    TRAINING_ARGS= {
        'batch_size': config['batch_size'],
        'epochs': config['epochs'],
        'initial_learning_rate': config['initial_learning_rate'],
        'end_learning_rate': config['end_learning_rate']
    }

    batch_size = TRAINING_ARGS['batch_size']
    epoch = TRAINING_ARGS['epochs']

    get_frozen_log_mel_spectrogram = partial(get_log_mel_spectrogram, **MEL_LOG_ARGS)
    train_mel_ds = train_file_ds.map(get_frozen_log_mel_spectrogram)

    for spectrogram, label in train_mel_ds.take(1):
        SHAPE = spectrogram.shape

    def preprocess_with_resized_mel(filename):
        log_mel_spectrogram, label = get_frozen_log_mel_spectrogram(filename)
        log_mel_spectrogram.set_shape(SHAPE)
        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)[..., :]
        mfccs = tf.expand_dims(mfccs, -1)
        mfccs = tf.image.resize(mfccs, [32, 32])
        label_id = tf.argmax(label == LABELS)

        return mfccs, label_id

    train_ds = train_file_ds.map(preprocess_with_resized_mel).batch(batch_size)
    validation_ds = validation_file_ds.map(preprocess_with_resized_mel).batch(batch_size)

    for example_batch, example_labels in train_ds.take(1):
        print(example_batch.shape)
        print(example_labels)

    model = utilities.get_cnn(SHAPE=example_batch.shape[1:], alpha=config['alpha'], num_hidden_layers=config['num_hidden_layers'])

    model_for_pruning, callbacks = utilities.compile_pruning_model(
        model = model,
        epoch = epoch,
        dim = len(train_ds),
        i_lr = TRAINING_ARGS['initial_learning_rate'],
        e_lr = TRAINING_ARGS['end_learning_rate']
    )

    history = model_for_pruning.fit(train_ds, epochs=epoch, validation_data=validation_ds, callbacks=callbacks)

    # computing statistics
    traning_loss, \
        training_accuracy, \
            val_loss, \
                val_accuracy = utilities.get_model_statistics(history = history)

    MODEL_NAME, ZIPPED_MODEL_NAME = utilities.convert_zip_save_model(model=model_for_pruning, idx=idx, network_type="cnn")

    # performing inference
    interpreter = tf.lite.Interpreter(model_path=f'tflite_models/{MODEL_NAME}.tflite')
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    avg_preprocessing_latency = 0.0
    avg_model_latency = 0.0
    latencies = []
    accuracy = 0.0

    start_map = time()
    mapped_test_ds = test_file_ds.map(get_frozen_log_mel_spectrogram)
    end_map =  time()
    avg_map_time = (end_map - start_map)/len(test_file_ds)

    for log_mel_spectrogram, true_label in mapped_test_ds:

        start_preprocess = time()

        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)[..., :config['num_mfccs_features']]
        mfccs = tf.expand_dims(mfccs, 0)
        mfccs = tf.expand_dims(mfccs, -1)
        mfccs = tf.image.resize(mfccs, [32, 32])

        end_preprocess = time()
        
        interpreter.set_tensor(input_details[0]['index'], mfccs) 
        interpreter.invoke()
        output = interpreter.get_tensor(output_details[0]['index'])

        end_inference = time()

        top_index = np.argmax(output[0])
        predicted_label = LABELS[top_index]

        accuracy += true_label.numpy().decode() == predicted_label
        avg_preprocessing_latency += avg_map_time + (end_preprocess - start_preprocess)
        avg_model_latency += end_inference - end_preprocess
        latencies.append(end_inference - start_preprocess)

    accuracy /= len(mapped_test_ds)
    avg_preprocessing_latency /= len(mapped_test_ds)
    avg_model_latency /= len(mapped_test_ds)
    median_total_latency = np.median(latencies)

    model_size = os.path.getsize(f'tflite_models/{MODEL_NAME}.tflite')
    compressed_model_size = os.path.getsize(f"zipped_models/{ZIPPED_MODEL_NAME}.zip")

    content = f"{config['frame_length_in_s']},\
        {config['frame_step_in_s']},{config['num_mel_bins']},\
            {config['lower_frequency']},{config['upper_frequency']},\
                {config['batch_size']},{config['epochs']},{config['initial_learning_rate']},\
                    {config['end_learning_rate']},{config['num_mfccs_features']},\
                        {config['alpha']},{config['num_hidden_layers']},\
                            {100 * accuracy:.3f},{1000 * avg_preprocessing_latency:.1f},\
                            {1000 * avg_model_latency:.1f},{1000 * median_total_latency:.1f},\
                                {model_size / 2 ** 10:.1f},{compressed_model_size / 2 ** 10:.1f},{idx}\n"

    utilities._log_output_to_csv(filename=f"{strategy}_cnn_results.csv", content=content)

2023-04-27 20:16:32.460752: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-04-27 20:16:32.460952: W tensorflow/stream_executor/cuda/cuda_driver.cc:263] failed call to cuInit: UNKNOWN ERROR (303)
  0%|          | 0/1 [00:00<?, ?it/s]

Using configuration: 
 {'alpha': 0.15, 'batch_size': 5, 'end_learning_rate': 1e-05, 'epochs': 5, 'frame_length_in_s': 0.04, 'frame_step_in_s': 0.02, 'initial_learning_rate': 0.01, 'lower_frequency': 1000, 'num_hidden_layers': 5, 'num_mel_bins': 20, 'num_mfccs_features': -1, 'upper_frequency': 7000}
(5, 32, 32, 1)
tf.Tensor([2 2 1 1 2], shape=(5,), dtype=int64)
Epoch 1/5
 4/69 [>.............................] - ETA: 1:08 - loss: 1.2495 - sparse_categorical_accuracy: 0.1500

  0%|          | 0/1 [00:13<?, ?it/s]


KeyboardInterrupt: 