In [18]:
# !pip install -q "tensorflow==2.11.*"
# !pip install -q "tensorflow_io==0.28.*"

In [19]:
# !kaggle datasets download -d warcoder/infant-cry-audio-corpus
# !unzip infant-cry-audio-corpus.zip -d infant-cry

In [1]:
import os

from IPython import display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_io as tfio
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping

In [21]:
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

In [2]:
root_dir = 'data/donateacry_corpus'
subfolders = ['belly_pain', 'burping', 'discomfort', 'hungry','tired']

In [4]:
file_paths = []
labels = []

def collect_files_and_labels(base_dir, subfolders, file_paths, labels):
    for folder_name in subfolders:
        folder_path = os.path.join(base_dir, folder_name)
        if os.path.isdir(folder_path):
            for file_name in os.listdir(folder_path):
                if file_name.endswith('.wav'):
                    file_path = os.path.join(folder_path, file_name)
                    file_paths.append(file_path)
                    labels.append(folder_name)

collect_files_and_labels(root_dir, subfolders, file_paths, labels)

In [5]:
df = pd.DataFrame({
    'File Path': file_paths,
    'Label': labels
})

In [6]:
df

Unnamed: 0,File Path,Label
0,data/donateacry_corpus\belly_pain\549a46d8-9c8...,belly_pain
1,data/donateacry_corpus\belly_pain\643D64AD-B71...,belly_pain
2,data/donateacry_corpus\belly_pain\643D64AD-B71...,belly_pain
3,data/donateacry_corpus\belly_pain\643D64AD-B71...,belly_pain
4,data/donateacry_corpus\belly_pain\643D64AD-B71...,belly_pain
...,...,...
452,data/donateacry_corpus\tired\d5abedab-9ed5-48d...,tired
453,data/donateacry_corpus\tired\d6cda191-4962-430...,tired
454,data/donateacry_corpus\tired\d6cda191-4962-430...,tired
455,data/donateacry_corpus\tired\d6cda191-4962-430...,tired


In [None]:
df.to_csv('infant_cry.csv', index=False) 

In [7]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

df['Label'] = label_encoder.fit_transform(labels)

In [8]:
X = df['File Path']
y = df['Label']

In [9]:
X

0      data/donateacry_corpus\belly_pain\549a46d8-9c8...
1      data/donateacry_corpus\belly_pain\643D64AD-B71...
2      data/donateacry_corpus\belly_pain\643D64AD-B71...
3      data/donateacry_corpus\belly_pain\643D64AD-B71...
4      data/donateacry_corpus\belly_pain\643D64AD-B71...
                             ...                        
452    data/donateacry_corpus\tired\d5abedab-9ed5-48d...
453    data/donateacry_corpus\tired\d6cda191-4962-430...
454    data/donateacry_corpus\tired\d6cda191-4962-430...
455    data/donateacry_corpus\tired\d6cda191-4962-430...
456    data/donateacry_corpus\tired\eea17285-4ab9-4d3...
Name: File Path, Length: 457, dtype: object

In [28]:
def train_validate_test_split(df):
    df = df.sample(frac=1).reset_index(drop=True)
    train_end = int(.9 * len(df.index))
    validate_end = int(.05 * len(df.index)) + train_end
    train = df.iloc[:train_end]
    validate = df.iloc[train_end:validate_end]
    test = df.iloc[validate_end:]
    return train, validate, test

In [29]:
df_train, df_validation, df_test = train_validate_test_split(df)
print(len(df_train), len(df_validation), len(df_test))

411 22 24


In [30]:
@tf.function
def load_wav_16k_mono(filename):
    """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(
          file_contents,
          desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

In [31]:
def wav_map(file_path, label):
    return load_wav_16k_mono(file_path), label

def extract_yamnet_embeddings(wav_data, targets):
    scores, embeddings, spectrogram = yamnet_model(wav_data)
    num_embeddings = tf.shape(embeddings)[0]
    return (embeddings, tf.repeat(targets, num_embeddings))

In [32]:
train_ds = tf.data.Dataset.from_tensor_slices((df_train['File Path'], df_train['Label']))
valid_ds = tf.data.Dataset.from_tensor_slices((df_validation['File Path'], df_validation['Label']))
test_ds = tf.data.Dataset.from_tensor_slices((df_test['File Path'], df_test['Label']))

train_ds = train_ds.map(wav_map)
valid_ds = valid_ds.map(wav_map)
test_ds = test_ds.map(wav_map)

train_ds = train_ds.map(extract_yamnet_embeddings).unbatch()
valid_ds = valid_ds.map(extract_yamnet_embeddings).unbatch()
test_ds = test_ds.map(extract_yamnet_embeddings).unbatch()


train_ds = train_ds.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
valid_ds = valid_ds.cache().batch(32).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.cache().batch(32).prefetch(tf.data.AUTOTUNE)

train_ds.element_spec





(TensorSpec(shape=(None, 1024), dtype=tf.float32, name=None),
 TensorSpec(shape=(None,), dtype=tf.int64, name=None))

In [33]:
# tf.keras.backend.clear_session()

# model = tf.keras.Sequential([
#     tf.keras.layers.Input(shape=(1024), dtype=tf.float32, name='input_1'),
#     tf.keras.layers.Dense(512, activation='relu', name='dense_1'),
#     tf.keras.layers.Dense(6, activation='softmax', name='output_1')
# ], name='model_criby')

tf.keras.backend.clear_session()

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(1024,)),  # Assuming the YAMNet embeddings have a shape of (1024,)
    tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(6, activation='softmax')  # Adjust the number of classes as needed
])

In [34]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [35]:
history = model.fit(train_ds, epochs=10, validation_data=valid_ds, callbacks=[early_stopping])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 3))
ax = axes.ravel()

for i, metric in enumerate(['accuracy', 'loss']):
    ax[i].plot(history.history[metric])
    ax[i].plot(history.history['val_' + metric])
    ax[i].set_xlabel('Epoch')
    ax[i].set_ylabel(metric)
    ax[i].set_ylim([0, 1])
    ax[i].legend(['train', 'valid'])

plt.show()

In [36]:
results = model.evaluate(test_ds, verbose=0)
print(f'Test Loss: {results[0]}')
print(f'Test Accuracy: {results[1]}')

Test Loss: 0.4301045835018158
Test Accuracy: 0.9074627161026001
