# Load model

In [None]:
from birdclassification.preprocessing.filtering import filter_recordings_30
from torch.utils.data import DataLoader
import torch
from sklearn.model_selection import train_test_split
from birdclassification.training.dataset import Recordings30
from birdclassification.training.cnn_training_torch.CNN_model import CNNNetwork
from birdclassification.training.preprocessing_pipeline import PreprocessingPipeline
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np
from noisereduce.torchgate import TorchGate as TG

In [None]:
PATH = '../../birdclassification/training/saved_models/model_TAK.pt'
cnn = CNNNetwork()
cnn.load_state_dict(torch.load(PATH))

In [None]:
cnn.eval()

# Prepare dataset

In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
SEED = 123
RECORDINGS_DIR = '/mnt/d/recordings_30/'
# RECORDINGS_DIR =  '/media/jacek/E753-A120/recordings_30/'
NOISES_DIR = '/path/to/noise/dataset'
SAMPLE_RATE = 32000
BATCH_SIZE = 32
LEARNING_RATE = 0.0001
EPOCHS = 20
NUM_WORKERS = 8

cnn.to(DEVICE)

In [None]:
df = filter_recordings_30("../../data/xeno_canto_recordings.csv", "../../data/bird-list-extended.csv", )

train_df, test_val_df = train_test_split(df, stratify=df['Latin name'], test_size=0.2, random_state = SEED)
val_df, test_df = train_test_split(test_val_df, stratify=test_val_df['Latin name'], test_size=0.5, random_state = SEED)

train_ds = Recordings30(train_df, recording_dir=RECORDINGS_DIR, device = DEVICE, random_fragment=True)
val_ds = Recordings30(val_df, recording_dir=RECORDINGS_DIR, device = DEVICE)
test_ds = Recordings30(test_df, recording_dir=RECORDINGS_DIR, device = DEVICE)

train_dl  = DataLoader(train_ds, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)
val_dl  = DataLoader(val_ds, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)
test_dl  = DataLoader(test_ds, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)

In [None]:
preprocessing_pipeline = PreprocessingPipeline(device=DEVICE, noises_df=None, noises_dir=NOISES_DIR, random_fragment=False)
preprocessing_pipeline.to(DEVICE)

## Show spectrograms

In [None]:
from birdclassification.preprocessing.spectrogram import generate_mel_spectrogram_seq
from birdclassification.visualization.plots import plot_torch_spectrogram

number_of_rec = 5
for i in range(number_of_rec):
    input, label = train_ds[i]
    spectrogram = preprocessing_pipeline(input.to(DEVICE))
    plot_torch_spectrogram(spectrogram.squeeze().cpu(), title=train_ds.get_mapping()[label])

## Predict bird demo

In [None]:
length_in_seconds = 3
sr = 32000
index = 27
map = test_ds.get_mapping()

cnn.eval()
with torch.no_grad():
    input, label = test_ds[index]
    spectrogram = preprocessing_pipeline(input.to(DEVICE))
    plot_torch_spectrogram(spectrogram.squeeze().cpu(), title=map[label])
    validation_output = cnn(spectrogram)
    predictions = torch.max(validation_output, dim=1)[1].data.squeeze()
    
print("Prediction: ", test_ds.get_mapping()[predictions.item()])

# Validation

In [None]:
from birdclassification.training.validation_metrics import calculate_metric, get_true_and_predicted_labels
from sklearn.metrics import f1_score, precision_score
import numpy as np

In [None]:
x, y = get_true_and_predicted_labels(cnn, test_dl, preprocessing_pipeline, DEVICE)

### F1 score (macro averaged)

In [None]:
f1_s = f1_score(x, y, average='macro')
f1_s

### Precision (macro averaged)

In [None]:
precision = precision_score(x, y, average='macro')
precision

### Classification report

In [None]:
names = list(train_ds.get_mapping().values())

In [None]:
classification = classification_report(x, y, target_names=names)

In [None]:
print(0, classification)

In [None]:
c_matrix = confusion_matrix(x, y)

In [None]:
cm = c_matrix
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
df_cm = pd.DataFrame(cm)
df_cm.columns = train_ds.get_mapping().values()
df_cm.index = train_ds.get_mapping().values()
plt.figure(figsize = (40,40))
s = sns.heatmap(df_cm, annot=True, cmap = 'binary', fmt='.2f')
s.set_xlabel('Prediction', fontsize=24, labelpad=70)
s.set_ylabel('True label', fontsize=24, labelpad=70)
# s.set_title("Confusion matrix of predictions")

## Most common mistakes

In [None]:
import numpy as np


map = train_ds.get_mapping()
def top_n_argmax_2d(array_2d, N):
    np.fill_diagonal(array_2d, 0)
    flat_indices = np.argpartition(array_2d.flatten(), -N)[-N:]
    indices_2d = np.unravel_index(flat_indices, array_2d.shape)
    return list(zip(indices_2d[0], indices_2d[1]))

N = 25 
top_indices = top_n_argmax_2d(cm, N)

# Print the indices of the top N elements
df = pd.DataFrame(columns=["Predicted", "True Label", "Frequency"])
for i, index in enumerate(top_indices):
    df.loc[len(df)] = [map[index[0]], map[index[1]], cm[index]]
df.sort_values(by = "Frequency", ascending=False)

## Training

In [None]:
import seaborn as sns
train_loss_df = pd.read_csv('../../birdclassification/training/saved_models/csv.csv')
ax = sns.lineplot(data=train_loss_df, x="Step", y="Value")
ax.set(xlabel='Step (batch size = 32)', ylabel='Loss', title = 'Train loss')

In [None]:
maf1 = pd.read_csv('../../birdclassification/training/saved_models/Macro_averaged_precision_score_Validation.csv')
ax = sns.lineplot(data=maf1, x="Step", y="Value")
ax.set(xlabel='Epoch', ylabel='Score', title = "Precision (macro)")

In [None]:
loss_training_df = pd.read_csv('../../birdclassification/training/saved_models/Training vs. Validation Loss_Training.csv')
loss_validation_df = pd.read_csv('../../birdclassification/training/saved_models/Training vs. Validation Loss_Validation.csv')
loss_training_df['split'] = 'train loss'
loss_validation_df['split'] = 'validation loss'
loss_training_df = loss_training_df[['Step', 'Value', 'split']]
loss_validation_df = loss_validation_df[['Step', 'Value', 'split']]
combined_df = pd.concat([loss_training_df, loss_validation_df])
ax = sns.lineplot(x="Step", y="Value", hue="split", ci="sd", data=combined_df)
ax.set(xlabel='Epoch', ylabel='Loss', title = "Loss")
