# Imports

In [3]:
import torch
import torch.nn as nn                # für nn layers
import torch.nn.functional as F       
import torch.optim as optim            
from torch.utils.data import Dataset, DataLoader 

#
import torchaudio
import torchaudio.transforms as T   
import numpy as np                     
import pandas as pd                   

import matplotlib.pyplot as plt

import os
from collections import OrderedDict, defaultdict

# Dataset Creation

In [7]:
class SpectogramDataset(Dataset):
    def __init__(self, audio_dir:str, label_to_idx:dict, max_cache_size: int = 5):
        self.label_to_idx = label_to_idx
        self.audio_dir = audio_dir
        self.chunk_index_pairs = []
        self.cache = OrderedDict()
        self.path_to_label = defaultdict(list)
        self.max_cache_size = max_cache_size

        for label in os.listdir(self.audio_dir):
            for file in os.listdir(f'{self.audio_dir}/{label}'):
                tensor = torch.load(f"{self.audio_dir}/{label}/{file}")
                amount_of_chunks = tensor.shape[0]
                self.path_to_label[label].append(f'{self.audio_dir}/{label}/{file}')
                for n in range(amount_of_chunks):
                    self.chunk_index_pairs.append((f'./data/processed_train_audio/{label}/{file}' ,label, n))

    def load_cached_tensor(self, file_path):
        if file_path in self.cache:
            self.cache.move_to_end(file_path)
        else:
            tensor = torch.load(file_path)
            self.cache[file_path] = tensor
            if len(self.cache) > self.max_cache_size:
                self.cache.popitem(last=False)
            return self.cache[file_path]


    def __len__(self):
        return len(self.chunk_index_pairs)
    
    def __getitem__(self, idx:int):
        file_path, label, chunk_index = self.chunk_index_pairs[idx]
        tensor = self.load_cached_tensor(file_path)
        chunk = tensor[chunk_index]

        return chunk, self.label_to_idx[label]

In [5]:
import pandas as pd
metadata = pd.read_csv("./data/processed_data.csv")
unique_labels = sorted(metadata["primary_label"].astype(str).unique())
label_to_index = {label: idx for idx,label in enumerate(unique_labels)}
index_to_label = {idx: label for idx,label in enumerate(unique_labels)}
print(list(unique_labels)[:10])
#kann auch numerisch sorten theoretisch

['0', '1', '10', '100', '101', '102', '103', '104', '105', '106']


## Create Data-Loader

In [6]:
%%time

audio_dir="./data/processed_train_audio"


train_dataset = SpectogramDataset(
    audio_dir=audio_dir,
    label_to_idx=label_to_index,
)

training_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

CPU times: user 1.98 s, sys: 20.7 s, total: 22.6 s
Wall time: 1min


# Base Model Architecture

In [None]:
import torch
import torch.nn as nn

class CNNClassifier(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv2d = nn.Conv2d(1,16,kernel_size=3,padding=1)
        self.conv = nn.Sequential(
            nn.Conv2d(1,16,kernel_size=3,padding=1), #1 nur wenn es wirklich grayscale images sind
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(16,32,kernel_size=3,padding=1), # 16 weil wir nochmals die input_size auf 32 erhöhen 
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 32 * 32, 128),
            nn.ReLU(),
            nn.Linear(128,num_classes)
        )
    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x 

# Backup Cells

In [8]:
import os
import torch
from torch.utils.data import Dataset

class ChunkedSpectrogramDataset(Dataset):
    def __init__(self, tensor_dir: str, label_to_idx: dict):
        self.tensor_dir = tensor_dir
        self.label_to_idx = label_to_idx
        self.index_list = []

        # 🧠 Indexstruktur aufbauen: [(label_name, chunk_idx), ...]
        for fname in os.listdir(tensor_dir):
            if fname.endswith(".pt"):
                label = fname.replace(".pt", "")
                tensor_path = os.path.join(tensor_dir, fname)
                try:
                    tensor = torch.load(tensor_path, map_location="cpu")
                    n_chunks = tensor.shape[0]
                    for i in range(n_chunks):
                        self.index_list.append((label, i))
                except Exception as e:
                    print(f"Fehler beim Laden von {tensor_path}: {e}")

    def __len__(self):
        return len(self.index_list)

    def __getitem__(self, idx):
        label_name, chunk_idx = self.index_list[idx]
        tensor_path = os.path.join(self.tensor_dir, f"{label_name}.pt")

        try:
            # 🔁 Nur dieses eine Label laden
            all_chunks = torch.load(tensor_path, map_location="cpu")
            chunk = all_chunks[chunk_idx]
        except Exception as e:
            print(f"Fehler beim Laden von Chunk {chunk_idx} für Label {label_name}: {e}")
            chunk = torch.zeros((1, 128, 216))  # Dummy shape anpassen falls nötig
            
        # 🎯 Label-Tensor (One-hot)
        label_tensor = torch.zeros(len(self.label_to_idx), dtype=torch.float32)
        if label_name in self.label_to_idx:
            label_index = self.label_to_idx[label_name]
            label_tensor[label_index] = 1.0

        return chunk, label_tensor

In [10]:
import pandas as pd

taxonomy_df = pd.read_csv("./data/taxonomy.csv")

id_to_name = {}
for index, row in taxonomy_df.iterrows():
    primary_label = row["primary_label"]
    common_name = row["common_name"] 
    id_to_name[primary_label] = common_name

#für die visualisierung später 

print(list(id_to_name.items())[:5])

[('1139490', 'Ragoniella pulchella'), ('1192948', 'Oxyprora surinamensis'), ('1194042', 'Copiphora colombiae'), ('126247', 'Spotted Foam-nest Frog'), ('1346504', 'Neoconocephalus brachypterus')]
