In [2]:
import os
import pandas as pd
import tensorflow as tf
import librosa
import tensorflow_io as tfio
import tensorflow_hub as hub
import keras
from keras.models import Sequential
from keras.layers import Input, Dense, BatchNormalization, Dropout
from keras.losses import SparseCategoricalCrossentropy
from keras.optimizers import SGD  
import tensorflow as tf
from tensorflow.keras import layers, models


In [3]:

birds_csv = 'small_birds_dataset\metadata.csv'
base_data_path = 'small_birds_dataset'
pd_data = pd.read_csv(birds_csv)

# Define classes 
my_classes = ['White-breasted Wood-Wren', 'House Sparrow', 'Red Crossbill', 'Chestnut-crowned Antpitta', 'Azara\'s Spinetail']
map_class_to_id = {bird_class: idx for idx, bird_class in enumerate(my_classes)}


filtered_pd = pd_data[pd_data['common_name'].isin(my_classes)]


class_id = filtered_pd['common_name'].apply(lambda name: map_class_to_id[name])
filtered_pd = filtered_pd.assign(target=class_id)


full_path = filtered_pd.apply(lambda row: os.path.join(base_data_path, row['split'], row['primary_label'], row['filename']), axis=1)
filtered_pd = filtered_pd.assign(filename=full_path)


filtered_pd.head(10)

# Define YAMNet model
yamnet_model_handle = "https://tfhub.dev/google/yamnet/1"
yamnet_model = hub.load(yamnet_model_handle)

# Extract embeddings
def extract_embedding(wav_data, label):
    scores, embeddings, spectrogram = yamnet_model(wav_data)
    num_embeddings = tf.shape(embeddings)[0]
    return (embeddings, tf.repeat(label, num_embeddings))

def load_wav_16k_mono(filename):
    try:
        file_contents = tf.io.read_file(filename)
        wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
        wav = tf.squeeze(wav, axis=-1)
        sample_rate = tf.cast(sample_rate, dtype=tf.int64)
        wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
        return wav
    except Exception as e:
        print(f"Error loading {filename}: {e}")
        return None
    

def load_wav_for_map(filename, label):
    return load_wav_16k_mono(filename), label

# Create TensorFlow Dataset for training and testing
def create_dataset(filenames, targets, split):
    dataset = tf.data.Dataset.from_tensor_slices((filenames, targets))
    dataset = dataset.map(lambda x, y: load_wav_for_map(x, y)).map(extract_embedding).unbatch()
    dataset = dataset.cache()

    if split == 'train':
        dataset = dataset.shuffle(1000)
    
    dataset = dataset.batch(32).prefetch(tf.data.AUTOTUNE)
    return dataset

train_filenames = filtered_pd[filtered_pd['split'] == 'train']['filename']

train_ds = create_dataset(filtered_pd[filtered_pd['split'] == 'train']['filename'],
                          filtered_pd[filtered_pd['split'] == 'train']['target'], 'train')

test_ds = create_dataset(filtered_pd[filtered_pd['split'] == 'test']['filename'],
                         filtered_pd[filtered_pd['split'] == 'test']['target'], 'test')

# Remove label column from the datasets
remove_label_column = lambda embedding, label: (embedding, label)

train_ds = train_ds.map(remove_label_column)
test_ds = test_ds.map(remove_label_column)


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089










In [12]:

# Function to create and compile the model
from tensorflow.keras import layers, models

def create_model(input_shape, num_classes):
    input_layer = layers.Input(shape=input_shape, dtype=tf.float32, name='input_embedding')
    
    
    x = layers.Dense(512, activation='relu')(input_layer)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)  
    x = layers.Dense(256, activation='relu')(x)  
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)
    
    
    
    output_layer = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs=input_layer, outputs=output_layer, name='model')
    return model


# Function to compile the model with specified optimizer and learning rate
def compile_model(model, learning_rate):
    
    model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

    return model

# Function to train the model
def train_model(model, train_dataset, epochs, steps_per_epoch):
    history = model.fit(train_dataset,
                        epochs=epochs,
                        steps_per_epoch=steps_per_epoch)
    return history


def evaluate_model(model, test_dataset):
    loss, accuracy = model.evaluate(test_dataset)
    return loss, accuracy


model = create_model(input_shape=(1024,), num_classes=len(my_classes))
model = compile_model(model, learning_rate=0.001)


batch_size = 16
total_samples = len(train_filenames)
steps_per_epoch = total_samples // batch_size


history = model.fit(train_ds, epochs=30, steps_per_epoch=steps_per_epoch)


loss, accuracy = evaluate_model(model, test_ds)


print("Loss: ", loss)
print("Accuracy: ", accuracy)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Loss:  2.6563286781311035
Accuracy:  0.36948975920677185


In [30]:
model.save("b_bird_model.h5")