In [None]:
from keras.callbacks import Callback
import keras.backend as K
import numpy as np
from keras_preprocessing.image import ImageDataGenerator
import librosa
import librosa.display
import os
import matplotlib.pyplot as plt
from matplotlib import figure
from PIL import Image

In [None]:
#Creating the required folders
try:
    os.makedirs("image/train/cat")
    os.makedirs("image/train/dog")
    os.makedirs("image/test/cat")
    os.makedirs("image/test/dog")
except FileExistsError:
    # directory already exists
    pass

In [None]:
#Function for creating the spectrogram of the audio from train dataset
def create_spectrogram(filename,name,folder):
    plt.interactive(False)
    clip, sample_rate = librosa.load('cats_dogs/train/'+folder+'/'+filename, sr=None)
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    filename  = 'image/train/'+ folder +'/' + name + '.jpg'
    plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
    plt.close()    
    fig.clf()
    plt.close(fig)
    plt.close('all')

In [None]:
#Fucntion for creating the spectrogram of the audio from test dataset
def create_spectrogram_test(filename,name,folder):
    plt.interactive(False)
    clip, sample_rate = librosa.load('cats_dogs/test/'+folder+'/'+filename, sr=None)
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    filename  = 'image/test/'+ folder +'/' + name + '.jpg'
    fig.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
    plt.close()    
    fig.clf()
    plt.close(fig)
    plt.close('all')

In [None]:
#Creating the spectrogram of the train split

a=0
for subdir in os.listdir('cats_dogs/train'):
  for filename in os.listdir('cats_dogs/train/'+subdir):
    filename,name = filename,filename.split('/')[-1].split('.')[0]
    create_spectrogram(filename,name,subdir)
    print("Image",a,'created')
    a=a+1

In [None]:

#Renaming the test folders
try:
    os.rename('cats_dogs/test/cats','cats_dogs/test/cat')
    os.rename('cats_dogs/test/test','cats_dogs/test/dog')
except FileNotFoundError:
    # directory name already changed
    pass
a=0
for subdir in os.listdir('cats_dogs/test'):
  for filename in os.listdir('cats_dogs/test/'+subdir):
    filename,name = filename,filename.split('/')[-1].split('.')[0]
    create_spectrogram_test(filename,name,subdir)
    print("Image",a,'created')
    a=a+1

In [None]:
training_datagen=ImageDataGenerator(rescale=1./255.)

validation_datagen=ImageDataGenerator(rescale=1./255)

train_generator=training_datagen.flow_from_directory(
    directory='image/train',
    class_mode='categorical',
    shuffle=True,
    seed=37,
    target_size=(128,128),
    batch_size=32
)

validation_generator=validation_datagen.flow_from_directory(
    directory='image/test',
    class_mode='categorical',
    shuffle=True,
    seed=37,
    target_size=(128,128),
    batch_size=32
)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten,Dropout,Conv2D, MaxPooling2D,Activation

model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', input_shape=(128,128,3)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))

In [None]:
#Callback fucntion to stop training when validation accuracy  reaches 95%
class myCallback(Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('val_acc')>0.95):
      print("\nReached 95% accuracy so cancelling training!")
      self.model.stop_training = True

model.compile(optimizers.Adam(lr=1e-4, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"])

In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size
callbacks = myCallback()

History=model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=validation_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=50,
                    callbacks=[callbacks]
)

In [None]:
#Function for creating spectrogram for the input audio to be predicted
def create_spectrogram_pred(filename,name):
    plt.interactive(False)
    clip, sample_rate = librosa.load(filename, sr=None)
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    filename  =  name + '.jpg'
    plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
    plt.close()    
    fig.clf()
    plt.close(fig)
    plt.close('all')
    del filename,name,clip,sample_rate,fig,ax,S

In [None]:
#Function for predicting cat or dog of the input audio
def pred_sound(filename):
  filename,name = filename,filename.split('/')[-1].split('.')[0]
  create_spectrogram_pred(filename,name)
  img = Image.open(name+'.jpg')
  img = img.resize((128,128))
  sample=np.expand_dims(img,axis=0)
  y_hat=model.predict(sample)
  if np.argmax(y_hat[0])==0:
    print("The audio is by cat")
  else:
    print("The audio is by dog")


In [None]:
pred_sound('bark.wav')