In [1]:
import os
import numpy as np
import soundfile as sf
import pickle 
import IPython.display as ipd


In [2]:
from vae_class import VAE
from sound_generator_class import SoundGenerator

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
HOP_LENGTH = 256
SAVE_DIR_ORIGINAL = "samples\\original\\"
SAVE_DIR_GENERATED = "samples\\generated\\"
MIN_MAX_VALUES_PATH = "min_max_values.pkl"
SPECTROGRAMS_PATH = "./spectrograms"

In [4]:
def select_spectrograms(spectrograms,
                        file_paths,
                        min_max_values,
                        num_spectrograms=2):
    sampled_indexes = np.random.choice(range(len(spectrograms)), num_spectrograms)
    sampled_spectrogrmas = spectrograms[sampled_indexes]
    file_paths = [file_paths[index] for index in sampled_indexes]
    sampled_min_max_values = [min_max_values[file_path] for file_path in
                           file_paths]

    return sampled_spectrogrmas, sampled_min_max_values


def save_signals(signals, save_dir, sample_rate=22050):
    for i, signal in enumerate(signals):
        save_path = os.path.join(save_dir, str(i) + ".wav")
        print(signal.shape)
        sf.write(save_path, signal, sample_rate)

        
def load_fsdd(spectrograms_path):
    x_train = []
    file_paths = []
    for root, _, file_names in os.walk(spectrograms_path):
        for file_name in file_names:
            file_path = root + "/" + file_name
            spectrogram = np.load(file_path) # (n_bins, n_frames, 1)
            x_train.append(spectrogram)
            file_paths.append(file_path)
    x_train = np.array(x_train)
    x_train = x_train[..., np.newaxis] # -> (3000, 256, 64, 1)
    return x_train, file_paths
        



## Generating Sounds


1.   load the VAE model
2.   load all spectrograms and their corresponding min_max values
3.   sample from all spectrograms
4.   convert them back to audio -using function defined in SoundGenerator
5.   for comparsion between generated and original spectrograms we will load both of them



In [9]:
if __name__ == "__main__" : 
  #1. load samples and their min max then sample random samples and generate 
  vae = VAE.load("sound_gen1") #load model
  sound_generator = SoundGenerator(vae, HOP_LENGTH)

  #2. load min_max_values and all spectograms
  with open(MIN_MAX_VALUES_PATH, "rb") as f:
     min_max_values = pickle.load(f)
  
  specs, file_paths = load_fsdd(SPECTROGRAMS_PATH)
  
  #3. sample from traning set
  sampled_spec, sampled_min_max_values = select_spectrograms(specs, file_paths,min_max_values , 5 )

  #4. generate audio from sampled_spectrograms
  signals, _ = sound_generator.generate(sampled_spec, sampled_min_max_values)

  #5.for seek of comparsion we will load original signals
  original_signals = sound_generator.convert_spectograms_to_audio(
        sampled_spec, sampled_min_max_values)
  
  save_signals(signals, SAVE_DIR_GENERATED)
  save_signals(original_signals, SAVE_DIR_ORIGINAL)


(16128,)
(16128,)
(16128,)
(16128,)
(16128,)
(16128,)
(16128,)
(16128,)
(16128,)
(16128,)
