In [None]:
from opensoundscape.annotations import BoxedAnnotations
import os
import pandas as pd
import numpy as np
import torch
import random

torch.manual_seed(0)
random.seed(0)
np.random.seed(0)

# Create training and validation set and train a Resnet-18 CNN

In [None]:
name_experiment = 'texel_baseline_20250331'
train_set_dir = os.path.join('/home/reindert/Valentin_REVO/surfperch_toshare/eval_texel Outputs/september 2024/surfperch/labeled_outputs/', name_experiment)

pos_samples_file = os.listdir(os.path.join(train_set_dir, 'downsweep')) 
neg_samples_file = os.listdir(os.path.join(train_set_dir, 'Unknown'))

pos_samples_file = [os.path.join(train_set_dir, 'downsweep', file) for file in pos_samples_file]
neg_samples_file = [os.path.join(train_set_dir, 'Unknown', file) for file in neg_samples_file]

df_pos = pd.DataFrame()
df_neg = pd.DataFrame()

# Add positive sample file in the dataframe in a column file and create a column label with 1
df_pos['file'] = pos_samples_file
df_pos['start_time'] = 0.0
df_pos['end_time'] = 5.0
df_pos['downsweep'] = 1

# Add negative sample file in the dataframe in a column file and create a column label with 0
df_neg['file'] = neg_samples_file
df_neg['start_time'] = 0.0
df_neg['end_time'] = 5.0
df_neg['downsweep'] = 0

# concatenate the two dataframe
df_trainset = pd.concat([df_pos, df_neg], ignore_index=True)
df_trainset['downsweep'].value_counts()

# Save the df as csv in the current repo
df_trainset.to_csv('train_set_baseline.csv', index=False)

In [None]:
# TODO - Change to 5-fold validation
# Prepare train and valid set
import sklearn.model_selection

df_trainset = pd.read_csv('train_set_baseline.csv', index_col=[0,1,2])

# Add a class column called background which is not(downsweep)
# df_trainset['background'] = 1 - df_trainset['downsweep']



In [None]:
train_df, valid_df = sklearn.model_selection.train_test_split(df_trainset, test_size=0.2, random_state=0, stratify=df_trainset['downsweep'])

train_df

In [None]:
from sklearn.model_selection import StratifiedKFold

# Create the 5-fold validation object
seed = 0
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
fish_sound ='downsweep'

# Iterate over the folds
for i, (train_index, test_index) in enumerate(kf.split(train_df, train_df[fish_sound])):
    # For verification and debugging purposes
    print(f"Fold {i}:")
    print(f"Train index: {train_index}")
    print(f"Test index: {test_index}")
    print("size of each label in train set")
    print(train_df.iloc[test_index][fish_sound].value_counts())
    print("\n")



In [None]:
from opensoundscape import CNN
# We use Resnet as most common architeture used in Bioacoustics (ref: Stowell 2022)
# Resnet 18 because how dataset is small (avoid overfitting)

architecture = 'resnet18' 
class_list = ['downsweep']
model = CNN(architecture=architecture,
            classes=class_list,
            sample_duration=5.0,
            overlay_df=None)

print(f"model.device is {model.device}")

### Subprocess / preprocess

In [None]:
# from opensoundscape.preprocess.actions import Action
# from opensoundscape.preprocess.action_functions import pcen

# # TODO - Not working, returns blank spectrums, assuming pcen is called from librosa with require data to correct scale

# model.preprocessor.insert_action(
#     action_index="PCEN",  # give it a name
#     action=Action(pcen),  # the action object
#     after_key="bandpass",  # where to put it (can also use before_key=...)
# )

## Check spectrograms visuals
Show samples of the training set, so data augmentation are visible as well, but deactivated for validation and test set

In [None]:
from opensoundscape.data_selection import resample
augmented_train_df = resample(train_df, n_samples_per_class=200, n_samples_without_labels=200, random_state=0)
train_df.shape
augmented_train_df.value_counts()

In [None]:
from librosa import pcen
model.preprocessor.pipeline

In [None]:
from opensoundscape.preprocess.utils import show_tensor_grid
from opensoundscape import AudioFileDataset
from  opensoundscape.preprocess.preprocessors import PCENPreprocessor
from opensoundscape import CNN, SpectrogramPreprocessor
from opensoundscape.preprocess.actions import Action
from opensoundscape.preprocess.action_functions import pcen

sample_rate = 32000

# Preprocessing
preprocessor = SpectrogramPreprocessor(5.0, overlay_df=train_df)
preprocessor.pipeline.load_audio.set(sample_rate=sample_rate)
preprocessor.pipeline.bandpass.set(min_f=50, max_f=2000)
preprocessor.pipeline.to_spec.set(window_samples = 4 * (sample_rate // 100),
                                        # overlap_samples = None,
                                        # fft_size = None,
                                        dB_scale = True,
                                        # scaling = 'spectrum'
                                        )

# Augmentation
preprocessor.pipeline.random_trim_audio.bypass = True
preprocessor.pipeline.time_mask.bypass = True
preprocessor.pipeline.time_mask.set(max_masks=2, max_width=0.1)
preprocessor.pipeline.frequency_mask.bypass = True
preprocessor.pipeline.frequency_mask.set(max_masks=2, max_width=0.1)
preprocessor.pipeline.add_noise.bypass = True
preprocessor.pipeline.random_affine.bypass = True

# model.preprocessor.pipeline.overlay.bypass = True
# preprocessor.pipeline.overlay.set(overlay_class='different', overlay_weight=0.5)
# preprocessor.insert_action(
#     action_index="PCEN",  # give it a name
#     action=Action(pcen),  # the action object
#     after_key="bandpass",  # where to put it (can also use before_key=...)
# )
# preprocessor.insert_action(
#     action_index="to_log",  # give it a name
#     action=Action(amplitude_to_db),  # the action object
#     after_key="PCEN",  # where to put it (can also use before_key=...)
# )


model.preprocessor = preprocessor

dataset = AudioFileDataset(train_df, model.preprocessor)
dataset.preprocessor.pipeline.overlay.bypass = True
# dataset.preprocessor.pipeline.overlay.set(overlay_class=train_df.sample(50))

sample_idx = 15
tensors = [dataset[i].data for i in range(sample_idx, sample_idx + 9)]
sample_labels = [list(dataset[i].labels[dataset[i].labels > 0].index) for i in range(sample_idx, sample_idx + 9)]

_ = show_tensor_grid(tensors, 3, labels=sample_labels)

## Training

In [None]:
from pathlib import Path
checkpoint_folder = Path("model_training_checkpoints")
checkpoint_folder.mkdir(exist_ok=True)

model.train(
    augmented_train_df,
    valid_df,
    epochs=5,
    batch_size=8,
    num_workers=16,
    save_interval=5,  # save checkpoint every 10 epochs
    save_path=checkpoint_folder,  # location to save checkpoints
    progress_bar=False
)

## Test set

In [None]:
test_files_path = '/home/reindert/Valentin_REVO/surfperch_toshare/eval_texel Data/september 2024/test_set/'
test_files_list = os.listdir(test_files_path)

# Create a list of all wav files with files ending by .wav
wav_files = sorted([file for file in test_files_list if file.endswith('.wav')])
annot_files = sorted([file for file in test_files_list if file.endswith('.txt')])

wav_files = [os.path.join(test_files_path, file) for file in wav_files]
annot_files = [os.path.join(test_files_path, file) for file in annot_files]

print("Checking files order\n", wav_files[:3])
print(annot_files[:3])

selection_files = annot_files
audio_files = wav_files

annotations = BoxedAnnotations.from_raven_files(raven_files=selection_files, audio_files=audio_files, annotation_column='Type')

clip_duration = 5.0
clip_overlap = 0
min_label_overlap = 0.2
species_of_interest = [fish_sound]

clip_labels = annotations.clip_labels(
    clip_duration=clip_duration,
    clip_overlap=clip_overlap,
    min_label_overlap=min_label_overlap,
    class_subset=species_of_interest)

clip_labels.value_counts()

In [None]:
annotations

In [None]:
import os
import pandas as pd
# Test set from pickle file
# Similar path to the one used in create_test_set_data.ipynb


In [None]:
clip_labels

In [None]:
# Load the test set from a pickle file instead of creating a new one

# Format test_set_df to match the format of the clip_labels
test_set_path = '/home/reindert/Valentin_REVO/surfperch_toshare/eval_texel Outputs/september 2024/surfperch/test_set/'
test_files_path = '/home/reindert/Valentin_REVO/surfperch_toshare/eval_texel Data/september 2024/test_set/'
fish_sound = 'A'

# load df from pickle
with open(os.path.join(test_set_path, 'test_set.pkl'), 'rb') as f:
    test_set_df = pd.read_pickle(f)

# Modify the format of the pickle to be compatible with opensoundscape
test_set_df = test_set_df.drop(columns=['Label', 'Embedding'])
test_set_df = test_set_df.rename(columns={'label_int': fish_sound, 'Starttime': 'start_time', 'Endtime': 'end_time', 'filename': 'file'})
test_set_df[fish_sound] = test_set_df[fish_sound].astype(bool)
test_set_df['file'] = test_files_path + test_set_df['file']
test_set_df = test_set_df.set_index(['file', 'start_time', 'end_time'])

In [None]:
test_set_df

In [None]:
pred_testset = model.predict(clip_labels, batch_size=64, num_workers=16, activation_layer='sigmoid', wandb_session=wandb_session)
predict_validset = model.predict(valid_df, batch_size=8, num_workers=16, activation_layer='sigmoid', wandb_session=wandb_session)

## Metrics calculation

In [None]:
from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score, precision_recall_curve, auc

# Compute precision from logits and labels
valid_labels = valid_df['A'].values
valid_pred = predict_validset['A'].values.round() #Threshold = 0.5

precision_valid = precision_score(valid_labels, valid_pred, pos_label=1, average='binary')
recall_valid = recall_score(valid_labels, valid_pred, pos_label=1, average='binary')
f1_valid = f1_score(valid_labels, valid_pred, pos_label=1, average='binary')
auc_roc_valid = roc_auc_score(valid_labels, predict_validset['A'].values)
precision, recall, _thresholds = precision_recall_curve(valid_labels, predict_validset['A'].values)
auc_precision_recall_valid = auc(recall, precision)

test_labels = clip_labels['A'].values
test_pred = pred_testset['A'].values.round()

precision_test = precision_score(test_labels, test_pred, pos_label=1, average='binary')
recall_test = recall_score(test_labels, test_pred, pos_label=1, average='binary')
f1_test = f1_score(test_labels, test_pred, pos_label=1, average='binary')
auc_roc_test = roc_auc_score(test_labels, pred_testset['A'].values)
precision, recall, _thresholds = precision_recall_curve(test_labels, pred_testset['A'].values)
auc_precision_recall_test = auc(recall, precision)

# print all metrics
print("Validation Set")
print("Precision valid: ", precision_valid)
print("Recall valid: ", recall_valid)
print("F1 valid: ", f1_valid)
print("AUC ROC valid: ", auc_roc_valid)
print("AUC precision recall: ", auc_precision_recall_valid)

print("\n\nTest Set")
print("Precision test: ", precision_test)
print("Recall test: ", recall_test)
print("F1 test: ", f1_test)
print("AUC ROC test: ", auc_roc_test)
print("AUC precision recall: ", auc_precision_recall_test)

In [None]:
metrics.items()
avg_valid = {}
for key, value in zip(metrics.keys(), metrics.values()):
    avg_valid[key] = 0

In [None]:
# create a pseudo dictionary of metrics for test

metrics = {'Precision_valid': [0.5,0.2], 'Recall_valid': [0.5,0.2], 'F1_valid': [0.5,0.2]}

avg_valid = {}
for key, value in zip(metrics.keys(), metrics.values()):
    # v is the list of grades for student k
    avg_valid[key] = sum(value)/ float(len(value))