# Imports

In [None]:
import torch
import torch.nn as nn               
import torch.nn.functional as F       
import torch.optim as optim            
from torch.utils.data import Dataset, DataLoader 

#
import torchaudio
import torchaudio.transforms as T   
import numpy as np                     
import pandas as pd                   

import matplotlib.pyplot as plt

import os
from collections import OrderedDict, defaultdict

# BASE MODEL ARCHITECTURE

In [36]:
import torch
import torch.nn as nn

class CNNClassifier(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        
        self.conv = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )

        # Dynamically determine the correct input size for the Linear layer
        self.flattened_size = self._get_flattened_size()
        
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.flattened_size, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes)
        )

    def _get_flattened_size(self):
        with torch.no_grad():
            dummy = torch.zeros(1, 1, 128, 801)  # Same shape as input chunks
            out = self.conv(dummy)
            return out.view(1, -1).shape[1]

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

# HYPERPARAMETERS

In [52]:
EPOCHS = 25
INIT_LR = 0.005
BATCH_SIZE = 64
LOSS = torch.nn.BCEWithLogitsLoss()

model = CNNClassifier(num_classes=206)
optimizer = optim.SGD(model.parameters(), lr=INIT_LR)

# Dataset Creation

In [39]:
class SpectogramDataset(Dataset):
    def __init__(self, audio_dir:str, label_to_idx:dict, max_cache_size: int = 5):
        self.label_to_idx = label_to_idx
        self.audio_dir = audio_dir
        self.chunk_index_pairs = []
        self.cache = OrderedDict()
        self.path_to_label = defaultdict(list)
        self.max_cache_size = max_cache_size

        for label in os.listdir(self.audio_dir):
            for file in os.listdir(f'{self.audio_dir}/{label}'):
                tensor = torch.load(f"{self.audio_dir}/{label}/{file}")
                amount_of_chunks = tensor.shape[0]
                self.path_to_label[label].append(f'{self.audio_dir}/{label}/{file}')
                for n in range(amount_of_chunks):
                    self.chunk_index_pairs.append((f'./data/processed_train_audio/{label}/{file}' ,label, n))

    def load_cached_tensor(self, file_path):
        if file_path in self.cache:
            self.cache.move_to_end(file_path)
        else:
            tensor = torch.load(file_path)
            self.cache[file_path] = tensor
            if len(self.cache) > self.max_cache_size:
                self.cache.popitem(last=False)
            return self.cache[file_path]


    def __len__(self):
        return len(self.chunk_index_pairs)
    
    def __getitem__(self, idx:int):
        file_path, label, chunk_index = self.chunk_index_pairs[idx]
        tensor = self.load_cached_tensor(file_path)
        chunk = tensor[chunk_index]

        return chunk, self.label_to_idx[label]

In [40]:
import pandas as pd
metadata = pd.read_csv("./data/processed_data.csv")
unique_labels = sorted(metadata["primary_label"].astype(str).unique())
label_to_index = {label: idx for idx,label in enumerate(unique_labels)}
index_to_label = {idx: label for idx,label in enumerate(unique_labels)}
print(list(unique_labels)[:10])
#kann auch numerisch sorten theoretisch

['0', '1', '10', '100', '101', '102', '103', '104', '105', '106']


## Create Data-Loader

In [30]:
%%time

audio_dir="./data/processed_train_audio"


train_dataset = SpectogramDataset(
    audio_dir=audio_dir,
    label_to_idx=label_to_index,
)


training_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

len(train_dataset)

CPU times: user 2.1 s, sys: 21 s, total: 23.1 s
Wall time: 1min 1s


361766

# Backup Cells

In [None]:
import os
import torch
from torch.utils.data import Dataset

class ChunkedSpectrogramDataset(Dataset):
    def __init__(self, tensor_dir: str, label_to_idx: dict):
        self.tensor_dir = tensor_dir
        self.label_to_idx = label_to_idx
        self.index_list = []

        # 🧠 Indexstruktur aufbauen: [(label_name, chunk_idx), ...]
        for fname in os.listdir(tensor_dir):
            if fname.endswith(".pt"):
                label = fname.replace(".pt", "")
                tensor_path = os.path.join(tensor_dir, fname)
                try:
                    tensor = torch.load(tensor_path, map_location="cpu")
                    n_chunks = tensor.shape[0]
                    for i in range(n_chunks):
                        self.index_list.append((label, i))
                except Exception as e:
                    print(f"Fehler beim Laden von {tensor_path}: {e}")

    def __len__(self):
        return len(self.index_list)

    def __getitem__(self, idx):
        label_name, chunk_idx = self.index_list[idx]
        tensor_path = os.path.join(self.tensor_dir, f"{label_name}.pt")

        try:
            # 🔁 Nur dieses eine Label laden
            all_chunks = torch.load(tensor_path, map_location="cpu")
            chunk = all_chunks[chunk_idx]
        except Exception as e:
            print(f"Fehler beim Laden von Chunk {chunk_idx} für Label {label_name}: {e}")
            chunk = torch.zeros((1, 128, 216))  # Dummy shape anpassen falls nötig
            
        # 🎯 Label-Tensor (One-hot)
        label_tensor = torch.zeros(len(self.label_to_idx), dtype=torch.float32)
        if label_name in self.label_to_idx:
            label_index = self.label_to_idx[label_name]
            label_tensor[label_index] = 1.0

        return chunk, label_tensor

# Überarbeitetes  ChunkDataset von ino (path issue)

In [41]:
import os
import torch
from torch.utils.data import Dataset

class inoChunkedSpectrogramDataset(Dataset):
    def __init__(self, file_list:list, label_to_idx: dict):
        self.file_list = file_list
        self.label_to_idx = label_to_idx
        self.index_list = []

        # 🧠 Indexstruktur aufbauen: [(label_name, chunk_idx), ...]
        for tensor_path in self.file_list:
            label = os.path.basename(tensor_path).replace(".pt", "")
            try:
                tensor = torch.load(tensor_path, map_location="cpu")
                n_chunks = tensor.shape[0]
                for i in range(n_chunks):
                    self.index_list.append((tensor_path, label, i))
            except Exception as e:
                print(f"Fehler beim Laden von {tensor_path}: {e}")

    def __len__(self):
        return len(self.index_list)

    def __getitem__(self, idx):
        tensor_path, label_name, chunk_idx = self.index_list[idx]
        try:
            all_chunks = torch.load(tensor_path, map_location="cpu")
            chunk = all_chunks[chunk_idx]
        except Exception as e:
            print(f"Fehler beim Laden von Chunk {chunk_idx} für Label {label_name}: {e}")
            chunk = torch.zeros((1, 128, 216))
        label_tensor = torch.zeros(len(self.label_to_idx), dtype=torch.float32)
        if label_name in self.label_to_idx:
            label_index = self.label_to_idx[label_name]
            label_tensor[label_index] = 1.0
            
        return chunk, label_tensor

# DataLoader for ChunkSpectogramDataset

In [1]:
import os
print(os.listdir("./data/processed_train_audio/"))

['crbtan1', '48124', '476537', '66016', '42087', 'crcwoo1', 'blcant4', '787625', '24292', '21116', '46010', 'compau', 'gybmar', '50186', 'brtpar1', 'whwswa1', '52884', '868458', 'royfly1', 'cinbec1', '963335', '476538', 'leagre', 'greibi1', 'ampkin1', 'plukit1', 'greani1', 'savhaw1', '22333', 'rosspo1', 'yelori1', 'recwoo1', 'rutjac1', '41970', 'baymac', 'butsal1', '555142', 'grnkin', '21038', '41778', 'cotfly1', 'yebfly1', 'bafibi1', 'amakin1', '548639', 'greegr', '66531', 'blbgra1', 'norscr1', 'spepar1', 'y00678', '24322', 'smbani', '1139490', '65349', 'watjac1', '65962', '21211', 'laufal1', '67252', '65336', 'strcuc1', '66578', 'spbwoo1', 'amekes', 'whttro1', 'trokin', 'yehbla2', 'blkvul', 'grekis', 'ywcpar', 'sahpar1', '134933', 'fotfly', 'strfly1', '42113', 'speowl1', 'gohman1', '566513', 'blcjay1', '715170', 'rtlhum', 'bucmot3', 'chbant1', '47067', 'stbwoo2', '135045', 'whtdov', 'sobtyr1', 'turvul', 'piwtyr1', 'cregua1', 'whbman1', '1462711', '22973', 'rugdov', 'yehcar1', 'cargra

In [11]:
import pandas as pd
metadata = pd.read_csv("./data/processed_data.csv")
unique_labels = sorted(metadata["primary_label"].astype(str).unique())
label_to_idx = {label: idx for idx,label in enumerate(unique_labels)}
print(len(label_to_idx.keys()))

tensor_dir = "./data/processed_train_audio/"

206


In [35]:
import torch

# Load the tensor from file
tensor = torch.load('./data/processed_train_audio/21038/iNat65519.pt')
# Print the shape
print(tensor.shape)


torch.Size([151, 1, 128, 801])


In [42]:
import os 
import random 
label_to_idx = label_to_idx
tensor_dir = "./data/processed_train_audio/"
#all file paths
all_files = []
for root, _, files in os.walk(tensor_dir):
    for fname in files:
        if fname.endswith(".pt"):
            all_files.append(os.path.join(root, fname))

random.shuffle(all_files)
split_idx = int(0.8 * len(all_files))
train_files = all_files[split_idx:]
test_files = all_files[:split_idx]


train_dataset = inoChunkedSpectrogramDataset(train_files, label_to_idx)
test_dataset = inoChunkedSpectrogramDataset(test_files, label_to_idx)

# DATALOADER

In [43]:
train_loader = DataLoader(train_dataset,batch_size=BATCH_SIZE, shuffle=True)

In [44]:
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

# Train Loop inoChunkedSpectogramDataset

In [53]:
loss_fn = LOSS
for epoch in range(EPOCHS):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()

    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            test_loss += loss_fn(output, target).item()

            # Multi-label prediction: sigmoid + threshold
            pred = (torch.sigmoid(output) > 0.5).float()

            # Count sample-wise full matches (optional: can do partial matching if needed)
            correct += (pred == target).all(dim=1).sum().item()

    test_loss /= len(test_loader)

    print("\nEpoch: {}, Test loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
        epoch, test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)
    ))


Epoch: 0, Test loss: 0.0000, Accuracy: 290186/290186 (100%)


Epoch: 1, Test loss: 0.0000, Accuracy: 290186/290186 (100%)



KeyboardInterrupt: 

# Map von id to name für die Visualisierung

In [10]:
import pandas as pd

taxonomy_df = pd.read_csv("./data/taxonomy.csv")

id_to_name = {}
for index, row in taxonomy_df.iterrows():
    primary_label = row["primary_label"]
    common_name = row["common_name"] 
    id_to_name[primary_label] = common_name

#für die visualisierung später 

print(list(id_to_name.items())[:5])

[('1139490', 'Ragoniella pulchella'), ('1192948', 'Oxyprora surinamensis'), ('1194042', 'Copiphora colombiae'), ('126247', 'Spotted Foam-nest Frog'), ('1346504', 'Neoconocephalus brachypterus')]
