## Imports

In [1]:
from pathlib import Path
from tqdm.auto import tqdm
import numpy as np
import tensorflow as tf

## Mount Drive

In [2]:
def is_running_on_colab():
    try:
        import google.colab
        return True
    except ImportError:
        return False

ON_COLAB = is_running_on_colab()
ON_COLAB

True

In [3]:
if ON_COLAB:
  from google.colab import drive
  drive.mount('/content/gdrive')
  # intermediate_folder = Path('/content/gdrive/MyDrive/Temp/Speech recognition project')
  intermediate_folder = Path('/content/gdrive/MyDrive/Colab Notebooks/Speech recognition')
else:
  intermediate_folder = Path('..') / 'data' / 'intermediate'

Mounted at /content/gdrive


## Read data

In [None]:
# X_train = np.load(intermediate_folder / 'train_main_1_sec_audio.npy')
# X_train.shape

In [None]:
# X_val = np.load(intermediate_folder / 'val_main_1_sec_audio.npy').astype(float)
# X_val.shape

In [None]:
# X_test = np.load(intermediate_folder / 'test_main_1_sec_audio.npy').astype(float)
# X_test.shape

In [4]:
def convert_file(file_from):
  # Load .npy file
  print(f'About to load file {file_from}')
  data = np.load(intermediate_folder / file_from)
  print(f'File loaded: {file_from}')

  # writer = tf.io.TFRecordWriter(str(intermediate_folder / (file_from[:-3] + 'tfrecords')))
  # for i in tqdm(range(data.shape[0])):
  #   feature = {
  #       "image": tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(data[i]).numpy()])),
  #       # "image": tf.train.Feature(float_list=tf.train.FloatList(value=data[i])),
  #   }
  #   example = tf.train.Example(features=tf.train.Features(feature=feature))
  #   writer.write(example.SerializeToString())

  # Define a function to convert the data to tf.train.Example types
  def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(value).numpy()]))

  def serialize_example(data):
    feature = {
        'data': _bytes_feature(data)
    }
    # Create a Features message using tf.train.Example.
    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()

  # Write the `tf.train.Example` observations to the file.
  with tf.io.TFRecordWriter(str(intermediate_folder / (file_from[:-3] + 'tfrecords'))) as writer:
    for row in tqdm(data):
      example = serialize_example(row)
      writer.write(example)


In [None]:
convert_file('train_main_1_sec_audio_stft_scipy.npy')

About to load file train_main_1_sec_audio_stft_scipy.npy
File loaded: train_main_1_sec_audio_stft_scipy.npy


  0%|          | 0/33566 [00:00<?, ?it/s]

In [None]:
convert_file('val_main_1_sec_audio_stft_scipy.npy')

About to load file val_main_1_sec_audio_stft_scipy.npy
File loaded: val_main_1_sec_audio_stft_scipy.npy


  0%|          | 0/4619 [00:00<?, ?it/s]

In [None]:
convert_file('test_main_1_sec_audio_stft_scipy.npy')

About to load file test_main_1_sec_audio_stft_scipy.npy
File loaded: test_main_1_sec_audio_stft_scipy.npy


  0%|          | 0/4689 [00:00<?, ?it/s]

In [5]:
convert_file('train_main_1_sec_audio_stft_librosa.npy')

About to load file train_main_1_sec_audio_stft_librosa.npy
File loaded: train_main_1_sec_audio_stft_librosa.npy


  0%|          | 0/33566 [00:00<?, ?it/s]

In [None]:
convert_file('val_main_1_sec_audio_stft_librosa.npy')

About to load file val_main_1_sec_audio_stft_librosa.npy
File loaded: val_main_1_sec_audio_stft_librosa.npy


  0%|          | 0/4619 [00:00<?, ?it/s]

In [None]:
convert_file('test_main_1_sec_audio_stft_librosa.npy')

About to load file test_main_1_sec_audio_stft_librosa.npy
File loaded: test_main_1_sec_audio_stft_librosa.npy


  0%|          | 0/4689 [00:00<?, ?it/s]