In [2]:
import matplotlib.pyplot as plt
from scipy import signal
from scipy.io import wavfile
import os
import numpy as np
import pandas as pd
import wavio

In [3]:
# set up file paths
train_audio_path = 'audio/'
train_pict_path = 'train/'
train_map_csv = 'train_map.csv'

#test_pict_path = 'test/'
#test_audio_path = '../data/test/audio/'
#test_map_csv = '../data/test_map.csv'



In [4]:

# set up paths
if not os.path.exists(train_pict_path):
    os.makedirs(train_pict_path)

# get all subfolder list
subfolderlist = []
for x in os.listdir(train_audio_path):
    if os.path.isdir(train_audio_path + '/' + x):
        subfolderlist.append(x)
        if not os.path.exists(train_pict_path + '/' + x):
            os.makedirs(train_pict_path + '/' + x)



In [5]:

# obtain a file map with path and target for training iterators
file_target = []
for x in subfolderlist:
    temp = [['/'.join([train_audio_path[:-1], x, y]), x] for
            y in os.listdir(train_audio_path + x) if '.wav' in y]
    file_target += temp

train_file_map = pd.DataFrame(file_target, columns=['path', 'target'])
print(">>> Generated train_file_map dataframe:")
print(train_file_map.head())

# generate a test_file_map
#test_path = [[x, 0] for x in os.listdir(test_audio_path) if '.wav' in x]
#test_file_map = pd.DataFrame(test_path, columns=['path', 'target'])
#print(">>> Generated test_file_map dataframe:")
#print(test_file_map.head())



>>> Generated train_file_map dataframe:
                               path target
0  audio/nine/c44d2a58_nohash_0.wav   nine
1  audio/nine/36050ef3_nohash_2.wav   nine
2  audio/nine/86648261_nohash_0.wav   nine
3  audio/nine/0e17f595_nohash_0.wav   nine
4  audio/nine/8dc26a15_nohash_0.wav   nine


In [6]:
# Function that changes .wav file into a spectrogram numbers


def log_specgram(audio, sample_rate, window_size=20, step_size=10, eps=1e-10):
    nperseg = int(round(window_size * sample_rate / 1e3))
    noverlap = int(round(step_size * sample_rate / 1e3))
    freqs, _, spec = signal.spectrogram(audio,
                                        fs=sample_rate,
                                        window='hann',
                                        nperseg=nperseg,
                                        noverlap=noverlap,
                                        detrend=False)
    return freqs, np.log(spec.T.astype(np.float32) + eps)



In [7]:
def wav2img(wav_path, targetdir='', figsize=(4, 4)):
    """
    takes in wave file path
    and the fig size. Default 4,4 will make images 288 x 288
    """
    fig = plt.figure(figsize=figsize)
    # use soundfile library to read in the wave files
    samplerate, test_sound = wavfile.read(wav_path)
    _, spectrogram = log_specgram(test_sound, samplerate)
    # create output path
    output_file = wav_path.split('/')[-1].split('.wav')[0]
    output_file = targetdir + '/' + output_file
    # plt.imshow(spectrogram.T, aspect='auto', origin='lower')
    plt.imsave('%s.png' % output_file, spectrogram)
    plt.close()
    return (output_file + '.png')




In [8]:

# generate train pictures
train_file_map['pict'] = ''
print(">>> Creating spectrograms for train .wav files")
for i in range(train_file_map.shape[0]):
    targetdir = train_pict_path + train_file_map.target[i]
    pict_path = wav2img(train_file_map.path[i], targetdir=targetdir)
    train_file_map.pict[i] = pict_path
    if (i + 1) % 500 == 0:
        print(">>> Generating %ith spectrogram..." % (i + 1))
        print(">>> File map %ith row:" % (i + 1))
        print(repr(train_file_map.iloc[i, ]))




>>> Creating spectrograms for train .wav files
>>> Generating 500th spectrogram...
>>> File map 500th row:
path      audio/nine/c22d3f18_nohash_0.wav
target                                nine
pict      train/nine/c22d3f18_nohash_0.png
Name: 499, dtype: object
>>> Generating 1000th spectrogram...
>>> File map 1000th row:
path      audio/nine/7211390b_nohash_3.wav
target                                nine
pict      train/nine/7211390b_nohash_3.png
Name: 999, dtype: object
>>> Generating 1500th spectrogram...
>>> File map 1500th row:
path      audio/nine/56eb74ae_nohash_1.wav
target                                nine
pict      train/nine/56eb74ae_nohash_1.png
Name: 1499, dtype: object
>>> Generating 2000th spectrogram...
>>> File map 2000th row:
path      audio/nine/32561e9e_nohash_0.wav
target                                nine
pict      train/nine/32561e9e_nohash_0.png
Name: 1999, dtype: object
>>> Generating 2500th spectrogram...
>>> File map 2500th row:
path      audio/go/ab5b211a

>>> Generating 19500th spectrogram...
>>> File map 19500th row:
path      audio/off/3bfd30e6_nohash_0.wav
target                                off
pict      train/off/3bfd30e6_nohash_0.png
Name: 19499, dtype: object
>>> Generating 20000th spectrogram...
>>> File map 20000th row:
path      audio/off/84bf12ff_nohash_1.wav
target                                off
pict      train/off/84bf12ff_nohash_1.png
Name: 19999, dtype: object
>>> Generating 20500th spectrogram...
>>> File map 20500th row:
path      audio/on/9f869f70_nohash_1.wav
target                                on
pict      train/on/9f869f70_nohash_1.png
Name: 20499, dtype: object
>>> Generating 21000th spectrogram...
>>> File map 21000th row:
path      audio/on/1ecfb537_nohash_0.wav
target                                on
pict      train/on/1ecfb537_nohash_0.png
Name: 20999, dtype: object
>>> Generating 21500th spectrogram...
>>> File map 21500th row:
path      audio/on/ced4e2a1_nohash_0.wav
target                           

  samplerate, test_sound = wavfile.read(wav_path)


>>> Generating 35000th spectrogram...
>>> File map 35000th row:
path      audio/right/8e05039f_nohash_3.wav
target                                right
pict      train/right/8e05039f_nohash_3.png
Name: 34999, dtype: object
>>> Generating 35500th spectrogram...
>>> File map 35500th row:
path      audio/right/eee5e541_nohash_1.wav
target                                right
pict      train/right/eee5e541_nohash_1.png
Name: 35499, dtype: object
>>> Generating 36000th spectrogram...
>>> File map 36000th row:
path      audio/right/1e4064b8_nohash_2.wav
target                                right
pict      train/right/1e4064b8_nohash_2.png
Name: 35999, dtype: object
>>> Generating 36500th spectrogram...
>>> File map 36500th row:
path      audio/right/becd5a53_nohash_0.wav
target                                right
pict      train/right/becd5a53_nohash_0.png
Name: 36499, dtype: object
>>> Generating 37000th spectrogram...
>>> File map 37000th row:
path      audio/right/7ea032f3_nohash_4.wav


>>> Generating 54000th spectrogram...
>>> File map 54000th row:
path      audio/three/528f9242_nohash_0.wav
target                                three
pict      train/three/528f9242_nohash_0.png
Name: 53999, dtype: object
>>> Generating 54500th spectrogram...
>>> File map 54500th row:
path      audio/left/f852895b_nohash_1.wav
target                                left
pict      train/left/f852895b_nohash_1.png
Name: 54499, dtype: object
>>> Generating 55000th spectrogram...
>>> File map 55000th row:
path      audio/left/106a6183_nohash_3.wav
target                                left
pict      train/left/106a6183_nohash_3.png
Name: 54999, dtype: object
>>> Generating 55500th spectrogram...
>>> File map 55500th row:
path      audio/left/ef2578c0_nohash_1.wav
target                                left
pict      train/left/ef2578c0_nohash_1.png
Name: 55499, dtype: object
>>> Generating 56000th spectrogram...
>>> File map 56000th row:
path      audio/left/b59fa113_nohash_0.wav
target    

In [10]:


i = 0
targetdir = train_pict_path + train_file_map.target[i]
targetdir
pict_path = wav2img(train_file_map.path[i], targetdir=targetdir)
pict_path
train_file_map.pict[i] = pict_path
print(repr(train_file_map.iloc[i,]))




path      audio/nine/c44d2a58_nohash_0.wav
target                                nine
pict      train/nine/c44d2a58_nohash_0.png
Name: 0, dtype: object


In [11]:

# saving train map file
train_file_map.to_csv(train_map_csv, index=True)

