In [37]:
import os
import thinkdsp as tp
import numpy as np
import librosa
from librosa.display import specshow
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm.notebook import tqdm
import multiprocessing as mp
from PIL import Image
import tensorflow as tf
import shutil

In [2]:
import re

def rename(path):
    path = re.sub(r'train_audio', 'train_spec_other', path)
    path = re.sub(r'wav$', 'png', path)
    
    return path

def wav_to_png(path):
    wave = tp.read_wave(path)
    spectrogram = wave.make_spectrogram(seg_length=1024)
    spectrogram.plot(high=5000)
    plt.axis('off')
    png_path = rename(path)
    os.makedirs(os.path.dirname(png_path), exist_ok=True)
    plt.savefig(png_path, bbox_inches='tight', pad_inches=0)

In [3]:
wavs = list(map(str, Path('train_audio').rglob('*.wav')))


In [23]:

with mp.Pool() as pool:
    pool.map(wav_to_png, tqdm(wavs))

  0%|          | 0/6398 [00:00<?, ?it/s]

In [4]:
pretrained_model = tf.keras.models.load_model('my_model', compile=False)

In [16]:
class_names = sorted(os.listdir('train_audio'))
class_names

['down', 'go', 'left', 'no', 'right', 'stop', 'up', 'yes']

In [42]:
predicted = []
real = []
for path in tqdm(wavs):
    spec = rename(path)
    with Image.open(spec) as img:
        img = np.asarray(img)[None, :, :, :3]

    cls = pretrained_model(img).numpy().argmax()
    cls_name = class_names[cls]
    predicted.append(cls_name)
    
    real_cls = re.findall(r'/(\w+)/', path)[0]
    real.append(real_cls)
    
    dst = os.path.join('train_audio_for_distill', 
                       cls_name, os.path.basename(path)
                      )
    os.makedirs(os.path.dirname(dst), exist_ok=True)
    shutil.copy(path, dst)

  0%|          | 0/6398 [00:00<?, ?it/s]

Pretrained model's accuracy on the training dataset.

In [43]:
np.sum(np.array(predicted) == np.array(real)) / len(real)

0.866051891216005