# Dataset

In [None]:
# import libraries
import warnings
warnings.filterwarnings("ignore")
import os
import time
import librosa
import zipfile
import numpy as np
import pandas as pd
import librosa.display
import IPython.display
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from PIL import Image
import seaborn as sns
import cv2

In [None]:
# Unzip dataset
!wget https://zenodo.org/record/1203745/files/UrbanSound8K.tar.gz -O urban8k.tgz
!tar -xzf urban8k.tgz
!rm urban8k.tgz

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
soundCSV = pd.read_csv("/content/UrbanSound8K/metadata/UrbanSound8K.csv")
soundCSV

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.000000,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.500000,62.500000,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.500000,64.500000,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.000000,67.000000,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.500000,72.500000,1,5,2,children_playing
...,...,...,...,...,...,...,...,...
8727,99812-1-2-0.wav,99812,159.522205,163.522205,2,7,1,car_horn
8728,99812-1-3-0.wav,99812,181.142431,183.284976,2,7,1,car_horn
8729,99812-1-4-0.wav,99812,242.691902,246.197885,2,7,1,car_horn
8730,99812-1-5-0.wav,99812,253.209850,255.741948,2,7,1,car_horn


## Preprocessing

In [None]:
#Spectrogramları oluşturduğumuz fonksiyon
def createSpectrogram(audio):
    y, sr = librosa.load(audio)
    spec = librosa.feature.melspectrogram(y=y, sr=sr)
    spec_conv = librosa.amplitude_to_db(spec, ref=np.max)
    return spec_conv

In [None]:
#wav dosyalarını spectrogram olarak kaydediyoruz
data = []
label = []
vaw_name = []
#/content/UrbanSound8K/audio
#"audio/fold"
path = "/content/UrbanSound8K/audio/"
for i in range(len(soundCSV)):
    fold_no = str(soundCSV.iloc[i]["fold"])
    file = soundCSV.iloc[i]["slice_file_name"]
    label.append(soundCSV.iloc[i]["classID"])
    filename = path + "fold"+fold_no + "/" + file
    vaw_name.append(file)
    data.append(createSpectrogram(filename))
    
    # save spectrogram as png files
    librosa.display.specshow(data[i])
    plt.savefig(f'{vaw_name[i]}.png', bbox_inches='tight', pad_inches=0)

In [None]:

def turnGray(img):
    # Load the input image
    image = cv2.imread(img, 0)
  
    return image

In [None]:
# for create grayscale of spectrograms
gray_spec = []
for i in range(len(soundCSV)):
    gray_spec.append(turnGray((soundCSV["slice_file_name"][i]).replace("wav", "png")))

In [None]:
# see a example image shape
gray_spec[0].shape

(374, 500)

In [None]:
# see original image size and resize as optimal image size
print('Original Dimensions : ',gray_spec[0].shape)
 
image_size = 128
dim = (image_size, image_size)

# resize image
resized = cv2.resize(gray_spec[0], dim, interpolation = cv2.INTER_CUBIC)
 
print('Resized Dimensions : ',resized.shape)
 
cv2.imshow("Resized image", resized)
cv2.waitKey(0)
cv2.destroyAllWindows()

Original Dimensions :  (374, 500)
Resized Dimensions :  (128, 128)


In [None]:
# a function that resize grayscale images
def resize(img):
    resized = cv2.resize(img, dim, interpolation=cv2.INTER_CUBIC)
    #cv2.imshow('Resized', resized)
    #cv2.waitKey(0)
    return resized

In [None]:
# resize all grayscale images
resized_grays = []
for i in range(len(soundCSV)):
    resized_grays.append(resize(gray_spec[i]))

In [None]:
# see a example
resized_grays[0].shape

(128, 128)

In [None]:
# see a example image
cv2.imshow("Resized image", resized_grays[0])
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
# a function that normalize grayscale images using openCV
def normalize(img):
    normalized = cv2.normalize(img, None, alpha=0,beta=200, norm_type=cv2.NORM_MINMAX)
    return normalized

In [None]:
# see a example normalized image
image_norm = cv2.normalize(resized_grays[0], None, alpha=0,beta=200, norm_type=cv2.NORM_MINMAX)

cv2.imshow('original Image', resized_grays[0])
cv2.imshow('Normalized Image', image_norm)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
# normalize all grayscale images
normalized_grays = []
for i in range(len(soundCSV)):
    normalized_grays.append(normalize(resized_grays[i]))

In [None]:
# size check
len(normalized_grays)

8732

In [None]:
# reshape images as 1D array and divide by 255.0 to scale between 0-1 
shaped_data = []
for i in range(len(soundCSV)):
    shaped_data.append(normalized_grays[i].reshape(-1)/ 255.0)

In [None]:
# see example shaped_data
shaped_data[0]

array([0.78431373, 0.        , 0.        , ..., 0.78431373, 0.78431373,
       0.78431373])

In [None]:
# turn the shaped_data to dataframe name as data
data = pd.DataFrame(shaped_data)
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16374,16375,16376,16377,16378,16379,16380,16381,16382,16383
0,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314
1,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314
2,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314
3,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314
4,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8727,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314
8728,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314
8729,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314
8730,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314


In [None]:
# define label
label = soundCSV["classID"]
label = pd.DataFrame(label)
label

Unnamed: 0,classID
0,3
1,2
2,2
3,2
4,2
...,...
8727,1
8728,1
8729,1
8730,1


In [None]:
# concat data and label
data = pd.concat([data, label], axis=1)
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16375,16376,16377,16378,16379,16380,16381,16382,16383,classID
0,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,3
1,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,2
2,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,2
3,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,2
4,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8727,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,1
8728,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,1
8729,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,1
8730,0.784314,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,0.784314,1


In [None]:
# get label
y_data = data["classID"]

# create train dataset
X_data = data.drop(["classID"], axis=1)
print("X_data:", X_data.shape)
print("y_data:", y_data.shape)

X_data: (8732, 16384)
y_data: (8732,)


In [None]:
# for train test split
from sklearn.model_selection import train_test_split

# split train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data,
                                                    test_size=0.2,
                                                    random_state=128)
print("X_train:", X_train.shape)
print("X_test:", X_test.shape)
print("y_train:", y_train.shape)
print("y_test:", y_test.shape)

X_train: (6985, 16384)
X_test: (1747, 16384)
y_train: (6985,)
y_test: (1747,)


In [None]:
# split train and validation dataset
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train,
                                                    test_size=0.15,
                                                    random_state=128)
print("X_train:", X_train.shape)
print("X_val:", X_test.shape)
print("y_train:", y_train.shape)
print("y_val:", y_test.shape)

X_train: (5937, 16384)
X_val: (1747, 16384)
y_train: (5937,)
y_val: (1747,)


In [None]:
# see train, test and validation dataset shape
print("X_train:", X_train.shape)
print("y_train:", y_train.shape)

print("X_val:", X_val.shape)
print("y_val:", y_val.shape)

print("X_test:", X_test.shape)
print("y_test:", y_test.shape)

X_train: (5937, 16384)
y_train: (5937,)
X_val: (1048, 16384)
y_val: (1048,)
X_test: (1747, 16384)
y_test: (1747,)


In [None]:
# save the X_train data as csv
X_train.to_csv('X_train.csv', index=False)

# save the y_train data as csv
y_train.to_csv('y_train.csv', index=False)

# save the X_val data as csv
X_val.to_csv('X_val.csv', index=False)

# save the y_val data as csv
y_val.to_csv('y_val.csv', index=False)

# save the X_test data as csv
X_test.to_csv('X_test.csv', index=False)

# save the y_test data as csv
y_test.to_csv('y_test.csv', index=False)

In [None]:
# save all data as csv file
data.to_csv("UrbanSound.csv")