## Create custom dataset
<https://pytorch.org/tutorials/beginner/basics/data_tutorial.html>

In [None]:
import torchvision.io
from sklearn.preprocessing import LabelEncoder
import librosa
from birdclassification.preprocessing.filtering import filter_recordings_30
from torch.utils.data import Dataset
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

class CustomAudioDataset(Dataset):
    def __init__(self, df, recording_dir, transform=None, target_transform=None):
        """
        Parameters
        ----------
        df: pd.DataFrame
            DataFrame of xeno-canto recordings
        recording_dir: str
            filepath to directory with recordings
        transform:
        target_transform:
        """
        
        df['filepath'] = df.apply(lambda x: f"{recording_dir}{x['Latin name']}/{str(x['id'])}.mp3" , axis=1)
        le = LabelEncoder()
        df['label'] = le.fit_transform(df['Latin name'])
        
        self.filepath = df['filepath'].to_numpy()
        self.label = df['label'].to_numpy()
        self.recording_dir = recording_dir
        self.transform = transform
        self.target_transform = target_transform
        self.le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))

    def __len__(self):
        return self.filepath.size

    def __getitem__(self, idx):
        audio, sr = librosa.load(self.filepath[idx])
        label = self.label[idx]
        
        audio = torch.from_numpy(audio).type(torch.float32)
        label = torch.tensor(label, dtype=torch.int8)
        
        if self.transform:
            audio = self.transform(audio)
        if self.target_transform:
            label = self.target_transform(label)
        
        return audio, label


## Split dataset

In [None]:
RECORDINGS_DIR = '/media/jacek/E753-A120/recordings_30/'

df = filter_recordings_30()

train_df, test_val_df = train_test_split(df, stratify=df['Latin name'], test_size=0.2)
val_df, test_df = train_test_split(test_val_df, stratify=test_val_df['Latin name'], test_size=0.5)

train_ds = CustomAudioDataset(train_df, recording_dir=RECORDINGS_DIR)
val_ds = CustomAudioDataset(val_df, recording_dir=RECORDINGS_DIR)
test_ds = CustomAudioDataset(test_df, recording_dir=RECORDINGS_DIR)

In [None]:
print(train_ds.__len__())
print(val_ds.__len__())
print(test_ds.__len__())

In [None]:
for i in range(5):
    print(train_ds[i])


## DataLoader

In [None]:
train_dataloader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_dataloader = DataLoader(val_ds, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_ds, batch_size=64, shuffle=True)

In [None]:
for element in train_dataloader:
    print(element)

## Example of custom transform on audio

In [None]:
import torchaudio
from torchaudio.transforms import Resample, TimeStretch, Spectrogram, FrequencyMasking, TimeMasking, MelScale, AddNoise
from birdclassification.visualization.plots import plot_waveform

class MyPipeline(torch.nn.Module):
    def __init__(
        self,
        input_freq=16000,
        resample_freq=8000,
        n_fft=1024,
        n_mel=256,
        stretch_factor=0.8,
    ):
        super().__init__()
       
        self.resample = Resample(orig_freq=input_freq, new_freq=resample_freq)
        self.spec = Spectrogram(n_fft=n_fft, power=2)
        self.spec_aug = torch.nn.Sequential(
            TimeStretch(stretch_factor, fixed_rate=True),
            FrequencyMasking(freq_mask_param=80),
            TimeMasking(time_mask_param=80),
            AddNoise()
        )
        self.mel_scale = MelScale(
            n_mels=n_mel, sample_rate=resample_freq, n_stft=n_fft // 2 + 1)

    def forward(self, waveform: torch.Tensor) -> torch.Tensor:
        # Resample the input
        resampled = self.resample(waveform)

        # # Convert to power spectrogram
        # spec = self.spec(resampled)
        # 
        # # Apply SpecAugment
        # spec = self.spec_aug(spec)
        # 
        # # Convert to mel-scale
        # mel = self.mel_scale(spec)
        return resampled

In [None]:
# Instantiate a pipeline
pipeline = MyPipeline()
# # Move the computation graph to CUDA
# pipeline.to(device=torch.device("cuda"), dtype=torch.float32)
audio, label = train_ds[3]
plot_waveform(audio, 20000)

In [None]:
features = pipeline(audio)
plot_waveform()