In [1]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
from tensorflow.keras import models,layers
import librosa
import pickle
import os
import json
from tqdm import tqdm
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Concatenate, Flatten, MaxPooling2D, Conv2D
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

In [12]:
path='C:/Users/Yuvraj/Downloads/birdclef-2022/birds2/'

In [3]:
def openAudioFile(path, sample_rate=44100, as_mono=True, mean_substract=False):
    
    sig, rate = librosa.load(path, sr=sample_rate, mono=as_mono)
    if mean_substract:
        sig -= sig.mean()
    return sig, rate

In [37]:
def melspec(sig, rate=44100, shape=(224, 224), fmin=20, fmax=20000, normalize=False):

    
    SAMPLE_RATE = rate  
    FMAX = fmax
    FMIN = fmin

   
    # Librosa mel-spectrum
    melspec = librosa.feature.melspectrogram(y=sig, sr=SAMPLE_RATE, n_mels=224, fmax=FMAX, fmin=FMIN)
    
    # Convert power spec to dB scale (compute dB relative to peak power)
    melspec = librosa.amplitude_to_db(melspec)

    # Normalize values between 0 and 1
    if normalize:
        melspec -= melspec.min()
        if not melspec.max() == 0:
            melspec /= melspec.max()
        else:
            mlspec = np.clip(melspec, 0, 1)

    return melspec.astype('float32')


In [6]:
def splitSignal(sig, rate, seconds, minlen):

    
    sig_splits = []
    for i in range(0, len(sig), int((seconds) * rate)):
        split = sig[i:i + int(seconds * rate)]
        
        if len(split) < int(minlen * rate):
            break
        
        
        if len(split) < int(rate * seconds):
            split = np.hstack((split, np.zeros((int(rate * seconds) - len(split),))))
        
        sig_splits.append(split)

    return sig_splits

In [7]:
with open(r'C:\Users\Yuvraj\Downloads\birdclef-2022\scored_birds.json') as sbfile:
    scored_birds = json.load(sbfile)

In [8]:
scored_birds

['akiapo',
 'aniani',
 'apapan',
 'barpet',
 'crehon',
 'elepai',
 'ercfra',
 'hawama',
 'hawcre',
 'hawgoo',
 'hawhaw',
 'hawpet1',
 'houfin',
 'iiwi',
 'jabwar',
 'maupar',
 'omao',
 'puaioh',
 'skylar',
 'warwhe1',
 'yefcan']

In [44]:
def mono_to_color(X, eps=1e-6, mean=None, std=None):
   
    X = np.stack([X, X, X], axis=-1)

    # Standardize
    mean = mean or X.mean()
    std = std or X.std()
    X = (X - mean) / (std + eps)

    # Normalize to [0, 255]
    _min, _max = X.min(), X.max()

    if (_max - _min) > eps:
        V = np.clip(X, _min, _max)
        V = 255 * (V - _min) / (_max - _min)
        V = V.astype(np.uint8)
    else:
        V = np.zeros_like(X, dtype=np.uint8)

    V=cv2.resize(V,(224,224))
    return V

In [63]:
train_audio_dir = r'C:\Users\Yuvraj\Downloads\birdclef-2022\train_audio'
if not os.path.exists(path):
    os.mkdir(path)

for dir in tqdm(os.listdir(train_audio_dir)):
    if dir in scored_birds:
        if not os.path.exists(path+dir):
            os.mkdir(path+dir)
        count=1
        for file in os.listdir(train_audio_dir+'/'+dir):
            path__=train_audio_dir+"/"+dir+"/"+file
            audio_data,audio_sample_rate=openAudioFile(path__)
            si=splitSignal(audio_data,audio_sample_rate,5,5)
            for sig in si:
                path__=os.path.join(path+dir,str(count)+'.jpg')
                m_spec=melspec(sig)
                cv2.imwrite(path__,mono_to_color(m_spec))
                count=count+1

100%|████████████████████████████████████████████████████████████████████████████████| 152/152 [52:33<00:00, 20.75s/it]


In [90]:
pre_model=tf.keras.applications.mobilenet.MobileNet(
    input_shape=(224,224,3),
    include_top=False,
    weights='imagenet')

In [91]:
pre_model.trainable=False

In [92]:
model=models.Sequential()
model.add(pre_model)
model.add(MaxPooling2D(name="MaxPool_"))
model.add(Dropout(0.3, name="dropout_out"))
model.add(Flatten())
model.add(Dense(21, activation='softmax'))

In [93]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenet_1.00_224 (Function (None, 7, 7, 1024)        3228864   
_________________________________________________________________
MaxPool_ (MaxPooling2D)      (None, 3, 3, 1024)        0         
_________________________________________________________________
dropout_out (Dropout)        (None, 3, 3, 1024)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 21)                193557    
Total params: 3,422,421
Trainable params: 193,557
Non-trainable params: 3,228,864
_________________________________________________________________


In [94]:
model.compile(loss='categorical_crossentropy',
                  optimizer='Adam',
                  metrics=['accuracy'])

In [68]:
a=0
for x in os.listdir(path):
    a=max(a,len(os.listdir(path+x)))
    print(len(os.listdir(path+x)))
print(a)

145
83
493
195
23
131
11
140
484
41
52
32
3260
418
803
49
224
7
5296
500
427
5296


In [95]:
images=[] #for images of all classes

for dir in os.listdir(path):
    dir_images=[os.path.join(path,dir,im_path) for im_path in os.listdir(path+dir)]
    np.random.shuffle(dir_images)
    
    count=len(dir_images)
    #max 1000 samples of each class
    if count>1000:
        images=images+dir_images[:1000]
    else:
        images=images+dir_images

    #min images should be more than or equal to 500 other wise add duplicate images of same class
    while count<500:
        images=images+[dir_images[np.random.randint(len(dir_images))]]
        count+=1

for i in range(len(images)):
    path_=images[i]
    class_=path_[46:]
    class_=class_.split('\\')[0]
    images[i]=(path_,class_)
    
np.random.shuffle(images)


vsplit = int(len(images) * .20)
train = images[:-vsplit]
val = images[-vsplit:]


In [70]:
len(images)

11803

In [71]:
s='warwhe1\\62.jpg'
s.split('\\')[0]

'warwhe1'

In [96]:
import math
class batchGenerator(tf.keras.utils.Sequence):

    def __init__(self, x_set, batch_size):
        self.x = x_set
        self.batch_size = batch_size

    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)

    def __getitem__(self, idx):
        batch_x = []
        batch_y = []
        for item in self.x[idx * self.batch_size:(idx + 1) * self.batch_size]:
            path,class_=item
            frame=cv2.imread(path)
            #frame=cv2.resize(frame,(64,64))   
            img=(np.asarray(frame)/255.0)    
            batch_x.append(img)
            y=[0]*21
            y[scored_birds.index(class_)]=1
            y=np.array(y)
            batch_y.append(y)

        batch_x = np.asarray(batch_x)
        batch_x = batch_x.astype(np.float32)
        return np.array(batch_x),np.asarray(batch_y)

In [76]:
obj=batchGenerator(train,32)

In [77]:
obj.__getitem__(0)

(array([[[[0.7019608 , 0.7019608 , 0.7019608 ],
          [0.6784314 , 0.6784314 , 0.6784314 ],
          [0.75686276, 0.75686276, 0.75686276],
          ...,
          [0.7137255 , 0.7137255 , 0.7137255 ],
          [0.69803923, 0.69803923, 0.69803923],
          [0.64705884, 0.64705884, 0.64705884]],
 
         [[0.67058825, 0.67058825, 0.67058825],
          [0.6392157 , 0.6392157 , 0.6392157 ],
          [0.7372549 , 0.7372549 , 0.7372549 ],
          ...,
          [0.70980394, 0.70980394, 0.70980394],
          [0.6901961 , 0.6901961 , 0.6901961 ],
          [0.60784316, 0.60784316, 0.60784316]],
 
         [[0.6392157 , 0.6392157 , 0.6392157 ],
          [0.5764706 , 0.5764706 , 0.5764706 ],
          [0.72156864, 0.72156864, 0.72156864],
          ...,
          [0.6745098 , 0.6745098 , 0.6745098 ],
          [0.6509804 , 0.6509804 , 0.6509804 ],
          [0.54509807, 0.54509807, 0.54509807]],
 
         ...,
 
         [[0.21568628, 0.21568628, 0.21568628],
          [0.     

In [97]:
len(train)/100

95.33

In [97]:
history=model.fit(batchGenerator(train,32),epochs=10,validation_data=batchGenerator(val,32),batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [98]:
model.save('audio_model2.h5')

In [99]:
im=cv2.imread('C:/Users/Yuvraj/Downloads/birdclef-2022/birds2/houfin\\10.jpg')
im=im/255.0
model.predict(np.array([im,]))

array([[1.4915972e-24, 3.3183236e-20, 1.2387266e-15, 1.9406895e-30,
        0.0000000e+00, 1.0404151e-13, 8.6370153e-33, 7.2327162e-29,
        1.0954826e-27, 1.4292243e-29, 3.7894574e-27, 4.9303665e-36,
        8.9203757e-01, 9.6247316e-13, 6.9617740e-12, 1.9943220e-28,
        6.2537410e-28, 0.0000000e+00, 9.0626429e-10, 2.0301230e-12,
        1.0796244e-01]], dtype=float32)

In [84]:
scored_birds

['akiapo',
 'aniani',
 'apapan',
 'barpet',
 'crehon',
 'elepai',
 'ercfra',
 'hawama',
 'hawcre',
 'hawgoo',
 'hawhaw',
 'hawpet1',
 'houfin',
 'iiwi',
 'jabwar',
 'maupar',
 'omao',
 'puaioh',
 'skylar',
 'warwhe1',
 'yefcan']