In [1]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
import tensorflow.keras as tfk
from tensorflow.keras.layers import Dense, Flatten, Conv1D, Embedding, Normalization, Conv1DTranspose,InputLayer
import tensorflow.keras.layers as tfkl
from tensorflow.keras import Model
import math
import time
import tensorflow as tf
import tensorflow_probability as tfp
import librosa
import json
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import matplotlib.pyplot as plt
import tensorflow_io as tfio
import random
import tensorflow as tf
from pathlib import Path
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [2]:
with open('../input/birdclef-2022/scored_birds.json','r') as sb:
  s_b = json.load(sb)
file_path = '../input/birdclef-2022'
train_df = pd.read_csv('../input/birdclef-2022/train_metadata.csv')
train_df = train_df[train_df['primary_label'].isin(s_b)]
bird_label = train_df["primary_label"].unique()

In [3]:
submission_df = pd.read_csv('../input/birdclef-2022/sample_submission.csv')
test_df = pd.read_csv('../input/birdclef-2022/test.csv')
if test_df.shape[0] != submission_df.shape[0]:
    raise ValueError('test submission row number didnt match')

In [4]:
train_path = '../input/birdclef-2022/train_audio'

def preprocessing(df, path,bird_label):
  le = 160000
  step = int((le/2))
  sample_rate = 32000
  train = []
  for label in tqdm(bird_label):
    files = librosa.util.find_files(os.path.join(path, label))
    for f in tqdm(files):
      yi = np.where(bird_label == label)
      # load audio\
      #print("1:",type(yi),type(yi.shape),yi[:10])
      y, sr = librosa.load(f,sr=sample_rate)
      #print(y)
      y = ((y-np.amin(y))*2)/(np.amax(y) - np.amin(y)) - 1
      #print("2:",type(y),y.shape,y[:10])
      
      org_len = len(y)
      intervals = librosa.effects.split(y, top_db= 15, ref= np.max)
      intervals = intervals.tolist()
      #print("3-1:",type(y),y.shape,y[:10])

      y = (y.flatten()).tolist()
      #print("3:",type(y),y[:10])
      
      nonsilent_y = []

      for p,q in intervals :
       nonsilent_y = nonsilent_y + y[p:q+1] 
      #print("4:",type(nonsilent_y),nonsilent_y[:10])
      y = np.array(nonsilent_y).astype('float32')
      if len(y) < le:
        while len(y) < le:
          y = np.concatenate((y, y))
        y = y[:le]
      #print("5:",type(y),y.shape,y[:10])
      
      # A 1024-point STFT with frames of 5 s and 50% overlap.
      stfts = tf.signal.stft(y, frame_length=le, frame_step=step,
                       fft_length=4096)
      #print("6:stfts",type(stfts),stfts[:10])
      spectrograms = tf.abs(stfts)

      # Warp the linear scale spectrograms into the mel-scale.
      num_spectrogram_bins = stfts.shape[-1]
      #print("7: num_spectrograms",type(num_spectrogram_bins),num_spectrogram_bins)
      lower_edge_hertz, upper_edge_hertz, num_mel_bins = 1000.0, 8000.0, 4096
        
      linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,
        upper_edge_hertz)
      #print("8: linear_to_mel_spectorgrams_matrix",type(linear_to_mel_weight_matrix),linear_to_mel_weight_matrix[:10])
     
        
      mel_spectrograms = tf.tensordot(
        spectrograms, linear_to_mel_weight_matrix, 1)
      #print("9: mel_spectrograms",type(mel_spectrograms),mel_spectrograms[:10])
        
      mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(
        linear_to_mel_weight_matrix.shape[-1:]))
      #print("continuous to mel spectrogema the shsape of the mel is ",mel_spectrograms.shape)
        
      # Compute a stabilized log to get log-magnitude mel-scale spectrograms.
      log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)
      #print("9: log_mel_spectrograms:",type(log_mel_spectrograms),log_mel_spectrograms[:10])
    
      
      mfccs = tf.signal.mfccs_from_log_mel_spectrograms(
        log_mel_spectrograms)
      print("final",type(mfccs),mfccs.shape,mfccs[:10])

      for mfc in mfccs:
        train.append((mfc, yi))
  return train

In [5]:
train_data = preprocessing(train_df, train_path, bird_label)

In [6]:
class TensorflowDataGenerator():
    'Characterizes a dataset for Tensorflow'
    def __init__(self, mel_list, batch_size):
      self.mel_list = mel_list
      self.batch_size = batch_size
      self.index_helper = 0
      self.le = len(mel_list)
    def __len__(self):
        return math.ceil(self.le/ self.batch_size)

    def __getitem__(self, index):
      if self.index_helper >= self.le:
        raise IndexError
      x, y = [], []
      for b in range(self.batch_size):
        if self.index_helper < self.le:
          #print("shape of original mel_list",mel)
          x.append(tf.expand_dims(self.mel_list[self.index_helper][0],0))
          y.append(tf.squeeze(self.mel_list[self.index_helper][1]))
          self.index_helper += 1
          
      return np.array(x).astype('float32'), np.array(y).astype('float32')

    def reset(self):
      self.index_helper = 0
        

In [7]:
random.seed(2022)
random.shuffle(train_data)  # shuffle it randomly

training_data = train_data[:int(0.9*len(train_data))]
val_data = train_data[int(0.9*len(train_data)):]

batch_size = 32


train_set = TensorflowDataGenerator(training_data,batch_size)


val_set = TensorflowDataGenerator(val_data,batch_size)

In [8]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv1D, BatchNormalization, Dense, Dropout
from tensorflow.keras.layers import AvgPool1D, GlobalAveragePooling1D, MaxPool1D, Conv1DTranspose
from tensorflow.keras.models import Model
from tensorflow.keras.layers import ReLU, concatenate
import tensorflow.keras.backend as K
# Creating Densenet121
tf.random.set_seed(2022)
def densenet(input_shape, n_classes, filters = 32):   
    #batch norm + relu + conv
    def bn_rl_conv(x,filters,kernel=1,strides=1):
        x = BatchNormalization()(x)
        x = ReLU()(x)
        x = Conv1D(filters, kernel, strides=strides,padding = 'same')(x)
        x = Dropout(0.1)(x)
        return x
    
    def dense_block(x, repetition):
        
        for _ in range(repetition):
            y = bn_rl_conv(x, 4*filters)
            y = bn_rl_conv(y, filters, 3)
            x = concatenate([y,x])
        return x
        
    def transition_layer(x):
        
        x = bn_rl_conv(x, K.int_shape(x)[-1] //2 )
        x = AvgPool1D(2, strides = 2, padding = 'same')(x)
        return x
    
    input = Input (input_shape)
    x = Conv1D(64, 3, strides=1, padding='causal', dilation_rate = 2, activation = 'relu')(input)
    x = BatchNormalization()(x)
    x = Conv1D(64, 3, strides=1, padding='causal', dilation_rate = 4, activation = 'relu')(x)
    x = BatchNormalization()(x)
    x = Conv1D(64, 3, strides=1, padding='causal', dilation_rate = 8, activation = 'relu')(x)
    x = BatchNormalization()(x)
    x = Conv1D(64, 7, strides = 2, padding = 'same')(x)
    x = Conv1DTranspose(32, 3,strides=1, activation = 'relu')(x)
    x = BatchNormalization()(x)
    x = Conv1DTranspose(64, 3,strides=1, activation = 'relu')(x)
    x = BatchNormalization()(x)
    x = Conv1DTranspose(128, 3,strides=1, activation = 'relu')(x)
    x = BatchNormalization()(x)
    x = MaxPool1D(3, strides = 2, padding = 'same')(x)
    
    for repetition in [6,12,32,32]:
        
        d = dense_block(x, repetition)
        x = transition_layer(d)
    x = GlobalAveragePooling1D()(d)
    x = Dense(2048 , activation = 'relu',kernel_regularizer=tf.keras.regularizers.L1(0.01),
    activity_regularizer=tf.keras.regularizers.L2(0.01))(x)
    x = Dropout(0.25)(x)
    output = Dense(n_classes, activation = 'softmax')(x)
    model = Model(input, output)
    return model
input_shape = (1, 4096)
n_classes = 21
model = densenet(input_shape,n_classes)
# [6,12,32,32]:

In [9]:
# learning_rate=1e-4 Adadelta
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
epoches = 10
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
train_acc = tf.keras.metrics.Mean()
train_loss = tf.keras.metrics.Mean()
val_acc = tf.keras.metrics.Mean()
val_loss = tf.keras.metrics.Mean()

In [10]:
@tf.function
def train_step(x_batch, y_batch):
  with tf.GradientTape() as tape:
    logits = model(x_batch, training=True)
    loss_value = loss_fn(y_batch, logits)
    grads = tape.gradient(loss_value, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    acc_value = tf.math.equal(y_batch, tf.cast(tf.math.argmax(logits, 1),dtype=tf.float32))
    train_acc.update_state(acc_value)
    train_loss.update_state(loss_value)
    
def val_step(x_batch_val, y_batch_val):

  val_logits = model(x_batch_val, training=False)
  loss_value = loss_fn(y_batch_val,val_logits) # check input and ground truth shape 

  acc_value = tf.math.equal(y_batch_val, tf.cast(tf.math.argmax(val_logits, 1),dtype=tf.float32))
  val_acc.update_state(acc_value)
  val_loss.update_state(loss_value)

In [11]:
for epoch in range(epoches):
  if epoch == 2:
    optimizer.lr.assign(1e-6)
  elif epoch == 3:
    optimizer.lr.assign(1e-5)
  elif epoch == 5:
    optimizer.lr.assign(1e-6)
  start_time = time.time()
  train_set.reset()
  val_set.reset()
  for x_batch, y_batch in tqdm(train_set):
    train_step(x_batch, y_batch)
    
  for x_batch_val, y_batch_val in tqdm(val_set):
    val_step(x_batch_val, y_batch_val)
  end_time = time.time()
  print(f'Epoch: {epoch} \tTraining Loss: {train_loss.result()} \tValidation Loss: {val_loss.result()} \tTraining Accuracy: {train_acc.result()} \tValidation Accuracy: {val_acc.result()} \tTime taken: {end_time - start_time}')

    
  train_acc.reset_states()
  train_loss.reset_states()
  val_acc.reset_states()
  val_loss.reset_states()

In [12]:
@tf.function
def test_step(x_batch_val):
  val_logits = model(x_batch_val, training=False)
  return tf.math.argmax(val_logits,1)

In [13]:
test_path = '../input/birdclef-2022/test_soundscapes/'
test_files = os.listdir(test_path)
def preprocessing_test_dat(test_path, files):
  le = 160000
  step = int((le/2))
  sample_rate = 32000
  test = []
  for file in tqdm(files):
    y, sr = librosa.load(test_path + file, sr=sample_rate)
    # y = y[:le + 1]
    for segment in range(0, len(y), sample_rate*5):
        row_id = file[:-4] + '_' + str(int((segment + (sample_rate * 5)) / (sample_rate)))
        if segment+le > len(y):
            yi = y[segment:]
            while len(yi) < le:
              yi = np.concatenate((yi, yi))
            yi = yi[:le]
        else:
            yi = y[segment:segment+le]
            
        stfts = tf.signal.stft(yi, frame_length=le, frame_step=le,
                       fft_length=4096)
        spectrograms = tf.abs(stfts)

        # Warp the linear scale spectrograms into the mel-scale.
        num_spectrogram_bins = stfts.shape[-1]
        lower_edge_hertz, upper_edge_hertz, num_mel_bins = 1000.0, 8000.0, 4096

        linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,
        upper_edge_hertz)
      
        mel_spectrograms = tf.tensordot(spectrograms, linear_to_mel_weight_matrix, 1)
        mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(
          linear_to_mel_weight_matrix.shape[-1:]))

        # Compute a stabilized log to get log-magnitude mel-scale spectrograms.
        log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)
  
        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrograms)
        test.append((row_id, mfccs))
  return test

In [14]:
class TensorflowDataGenerator_test():
    'Characterizes a dataset for Tensorflow'
    def __init__(self, mel_list, batch_size):
      self.mel_list = mel_list
      self.batch_size = batch_size
      self.index_helper = 0
      self.le = len(mel_list)
    def __len__(self):
        return math.ceil(self.le/ self.batch_size)

    def __getitem__(self, index):
      if self.index_helper >= self.le:
        raise IndexError
      x, y = [], []
      for b in range(self.batch_size):
        if self.index_helper < self.le:
          x.append(self.mel_list[self.index_helper][0])
          y.append(self.mel_list[self.index_helper][1])
          self.index_helper += 1
      return x, np.array(y).astype('float32')

    def reset(self):
      self.index_helper = 0
        

In [15]:








test_dat = preprocessing_test_dat(test_path, test_files)

In [16]:
batch_size = 32

test_set = TensorflowDataGenerator_test(test_dat,batch_size)

In [17]:
predictions = []
test_set.reset()
for x_batch, y_batch in tqdm(test_set):
    preds = test_step(y_batch)
    for idx, pred in enumerate(preds):
        split_code = x_batch[idx].split('_')
        for bird in bird_label:
            row_id = split_code[0] +'_'+ split_code[1]+'_' + bird+'_'+split_code[2]
            predictions.append([row_id, True if bird == bird_label[pred] else False])

In [18]:
sub_df = pd.DataFrame(predictions,columns=['row_id', 'target'])
sub_df.head()

In [19]:
sub_df.to_csv('submission.csv', index=False)