# Load model

In [1]:
from birdclassification.preprocessing.filtering import filter_recordings_30
from torch.utils.data import DataLoader
import torch
from sklearn.model_selection import train_test_split
from birdclassification.training.dataset import Recordings30
from birdclassification.training.cnn_training_torch.CNN_model import CNNNetwork
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import balanced_accuracy_score, accuracy_score, confusion_matrix, classification_report

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
PATH = '../saved_models/cnn_1.pt'
cnn = CNNNetwork()
cnn.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [3]:
cnn.eval()

CNNNetwork(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear): Linear(in_features=10880, out_features=30, bias=True)
)

# Prepare dataset

In [4]:
# DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
SEED = 123
RECORDINGS_DIR = '/mnt/d/recordings_30/' #'/media/jacek/E753-A120/recordings_30/'
NOISES_DIR = '/path/to/noise/dataset'
SAMPLE_RATE = 32000
NUM_SAMPLES = SAMPLE_RATE * 1
BATCH_SIZE = 32
LEARNING_RATE = 0.0001
EPOCHS = 5
NUM_WORKERS = 8

In [5]:
df = filter_recordings_30("../../../data/xeno_canto_recordings.csv", "../../../data/bird-list-extended.csv", )

train_df, test_val_df = train_test_split(df, stratify=df['Latin name'], test_size=0.2, random_state = SEED)
val_df, test_df = train_test_split(test_val_df, stratify=test_val_df['Latin name'], test_size=0.5, random_state = SEED)

train_ds = Recordings30(train_df, recording_dir=RECORDINGS_DIR, noises_dir=NOISES_DIR, sample_rate=SAMPLE_RATE, device = DEVICE)
val_ds = Recordings30(val_df, recording_dir=RECORDINGS_DIR, noises_dir=NOISES_DIR, sample_rate = 32000, device = DEVICE)
test_ds = Recordings30(test_df, recording_dir=RECORDINGS_DIR, noises_dir=NOISES_DIR,sample_rate = 32000,device = DEVICE)

train_dl  = DataLoader(train_ds, batch_size= BATCH_SIZE, num_workers=NUM_WORKERS)
val_dl  = DataLoader(val_ds, batch_size= BATCH_SIZE, num_workers=NUM_WORKERS)
test_dl  = DataLoader(test_ds, batch_size= BATCH_SIZE, num_workers=NUM_WORKERS)

  recordings = pd.read_csv(filepath_recordings)


# Validation

In [6]:
from birdclassification.training.validation_metrics import calculate_metric
from sklearn.metrics import f1_score, precision_score
import numpy as np

In [7]:
train_ds.get_mapping()

{0: 'Alauda arvensis',
 1: 'Anas platyrhynchos',
 2: 'Apus apus',
 3: 'Asio otus',
 4: 'Buteo buteo',
 5: 'Carduelis carduelis',
 6: 'Ciconia ciconia',
 7: 'Columba livia',
 8: 'Corvus corax',
 9: 'Corvus cornix',
 10: 'Corvus frugilegus',
 11: 'Cuculus canorus',
 12: 'Cygnus olor',
 13: 'Dendrocopos major',
 14: 'Garrulus glandarius',
 15: 'Grus grus',
 16: 'Hirundo rustica',
 17: 'Lophophanes cristatus',
 18: 'Passer domesticus',
 19: 'Phalacrocorax carbo',
 20: 'Phasianus colchicus',
 21: 'Phoenicurus ochruros',
 22: 'Pica pica',
 23: 'Picus viridis',
 24: 'Sternula albifrons',
 25: 'Sturnus vulgaris',
 26: 'Tetrao urogallus',
 27: 'Turdus merula',
 28: 'Turdus torquatus',
 29: 'Turdus viscivorus'}

In [8]:
f1_score = calculate_metric(cnn, val_dl, device=DEVICE, metric=lambda x, y: f1_score(x, y, average='macro'))
f1_score

0.5051342377832662

In [9]:
precision = calculate_metric(cnn, val_dl, device=DEVICE, metric=lambda x, y: precision_score(x, y, average='macro'))
precision

0.5804280546740362

In [10]:
classification = calculate_metric(cnn, val_dl, device=DEVICE, metric=classification_report)
classification

'              precision    recall  f1-score   support\n\n         0.0       0.71      0.87      0.78       265\n         1.0       0.74      0.63      0.68       108\n         2.0       0.77      0.75      0.76        71\n         3.0       0.58      0.24      0.34       127\n         4.0       0.60      0.67      0.63        94\n         5.0       0.10      0.05      0.07       206\n         6.0       0.70      0.50      0.58        14\n         7.0       0.67      0.45      0.54        22\n         8.0       0.48      0.88      0.62       207\n         9.0       0.49      0.36      0.42       121\n        10.0       0.48      0.69      0.57        70\n        11.0       0.35      0.86      0.49       195\n        12.0       0.19      0.27      0.22        52\n        13.0       0.85      0.62      0.72       246\n        14.0       0.38      0.44      0.41       188\n        15.0       0.41      0.07      0.12        95\n        16.0       0.75      0.57      0.65       166\n       

In [12]:
confusion_matrix = calculate_metric(cnn, val_dl, device=DEVICE, metric=confusion_matrix) # BUGGGGG

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/home/piotr/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3526, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_1345/875642692.py", line 1, in <module>
    confusion_matrix = calculate_metric(cnn, val_dl, device=DEVICE, metric=confusion_matrix)
  File "/home/piotr/thesis/Bird-classification-model/birdclassification/training/validation_metrics.py", line 187, in calculate_metric
  File "/home/piotr/.local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 630, in __next__
    data = self._next_data()
  File "/home/piotr/.local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1328, in _next_data
    idx, data = self._get_data()
  File "/home/piotr/.local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1294, in _get_data
    success, data = self._try_get_data()
  File "/home/piotr/.local/lib/python3.10/site-packages/torch/uti

In [None]:
cm = confusion_matrix
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
df_cm = pd.DataFrame(cm)
df_cm.columns = train_ds.get_mapping().values()
df_cm.index = train_ds.get_mapping().values()
plt.figure(figsize = (40,40))
s = sns.heatmap(df_cm, annot=True, cmap = 'binary', fmt='.2f')
s.set_xlabel('True label', fontsize=24, labelpad=70)
s.set_ylabel('Prediction', fontsize=24, labelpad=70)
# s.set_title("Confusion matrix of predictions")