In [1]:
import numpy as np
import tensorflow as tf
from tqdm import tqdm
import os
import shutil
import librosa
import cv2
import pickle

In [2]:
DIR = "UrbanSound8K/audio/"
TARGET_DIR = "city_sounds"
SPEC_DIR = "spectrograms"
PICKLE_DIR = "urbansounds_pickles"
CATEGORIES = ("air_conditioner", 
              "car_horn", 
              "children_playing", 
              "dog_bark", 
              "drilling", 
              "engine_idling", 
              "gun_shot", 
              "jackhammer", 
              "siren", 
              "street_music")
IMG_SIZE = (128,96)
BATCH_SIZE = 32

In [3]:
training_data = []

def normalize_minmax(x):
    return (x-x.min())/(x.max()-x.min())

def create_training_data():
    error_amount=0
    errors = []
    for category in tqdm(CATEGORIES):
        path = os.path.join(SPEC_DIR, category)
        label_index = CATEGORIES.index(category)
        for img in os.listdir(path):
            try:
                img = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
                
                # Resizing
                img = cv2.resize(img, (IMG_SIZE[0], IMG_SIZE[1]))
                
                # Normalization
                img = normalize_minmax(img)

                training_data.append([img, label_index])
            except Exception as e:
                error_amount +=1
                errors.append(e)
                pass
    print("Number of errors:",error_amount)
#     print("Errors:", errors)

create_training_data()

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:22<00:00,  2.26s/it]

Number of errors: 0





In [4]:
X = []
y = []

for features, label in training_data:
    X.append(features)
    y.append(label)

In [5]:
X_train = X[:6000]
y_train = y[:6000]

X_test = X[6000:7366]
y_test = y[6000:7366]

X_val = X[7366:]
y_val = y[7366:]

In [6]:
X_train = np.array(X_train).reshape(-1, IMG_SIZE[0], IMG_SIZE[1], 1) # 1 for grayscale, 3 for RGB/BGR
y_train = np.array(y_train)

X_test = np.array(X_test).reshape(-1, IMG_SIZE[0], IMG_SIZE[1], 1)
y_test = np.array(y_test)

X_val = np.array(X_val).reshape(-1, IMG_SIZE[0], IMG_SIZE[1], 1)
y_val = np.array(y_val)

In [7]:
X_train[0].shape

(128, 96, 1)

In [8]:
# Pickle Dumping

dataset_dict = {
    "X_train": X_train, 
    "X_test": X_test, 
    "X_val": X_val, 
    "y_train": y_train, 
    "y_test": y_test, 
    "y_val": y_val
}

for k,v in dataset_dict.items():
    with open(os.path.join(PICKLE_DIR, f"{k}.pickle"),"wb") as file:
        pickle.dump(v, file)