# MixUp aug in Albumentations

In version [1.4.1](https://github.com/albumentations-team/albumentations/releases/tag/1.4.1), released at 4 March 2024 we added MixUp transform.

(It will take some time for Kaggle docker to update to that version) 

Here is an example of how to apply it for this competition.

In [None]:
%matplotlib inline

In [None]:
from pylab import *
from pathlib import Path
import numpy as np
import pandas as pd

In [None]:
import torch
import torchaudio

In [None]:
!pip install -U albumentations

In [None]:
import albumentations as A

In [None]:
def spectrogram_from_eeg4(parquet_path: Path) -> np.ndarray:
    FEATS = [
        ["Fp1", "F7", "T3", "T5", "O1"],
        ["Fp1", "F3", "C3", "P3", "O1"],
        ["Fp2", "F8", "T4", "T6", "O2"],
        ["Fp2", "F4", "C4", "P4", "O2"],
    ]

    # Load the entire EEG series
    eeg = pd.read_parquet(parquet_path)

    # Determine the maximum width based on the length of EEG data
    max_width = len(eeg) // (200 // 2)  # Assuming hop_length = sample_rate / 2 for MelSpectrogram

    # Variable to hold spectrogram
    # Note: The width (second dimension) is now variable based on EEG length
    img = np.zeros((128, max_width, 4), dtype="float32")

    for k in range(4):
        cols = FEATS[k]

        for kk in range(4):
            # Compute pair differences
            x = eeg[cols[kk]].to_numpy() - eeg[cols[kk + 1]].to_numpy()

            # Fill NaNs
            m = np.nanmean(x)
            x = np.where(np.isnan(x), m, x)

            # Convert to tensor and add a batch dimension
            x_tensor = torch.tensor(x, dtype=torch.float32).unsqueeze(0)

            # Adjust MelSpectrogram parameters based on EEG data length
            mel_spectrogram = torchaudio.transforms.MelSpectrogram(
                sample_rate=200,
                n_fft=1024,
                win_length=128,
                hop_length=len(x) // max_width,  # Adjust based on the length of the data
                n_mels=128,
                f_min=0,
                f_max=20,
                power=2.0,
            )

            # Compute spectrogram
            mel_spec_tensor = mel_spectrogram(x_tensor)

            # Convert power spectrogram to dB scale
            mel_spec_db_tensor = torchaudio.transforms.AmplitudeToDB(stype="power")(mel_spec_tensor)

            # Normalize and standardize the spectrogram
            mel_spec_db_np = (mel_spec_db_tensor.numpy() + 40) / 40

            # Ensure the spectrogram is not larger than allocated size
            current_width = min(mel_spec_db_np.shape[2], max_width)
            
            img[:, :current_width, k] += mel_spec_db_np.squeeze()[:, :current_width]


        # Average the 4 montage differences
        img[:, :current_width, k] /= 4.0

    # Reverse the frequency axis so low frequencies are at the bottom of the image
    return img[::-1, :current_width, :]

In [None]:
DATA_PATH = Path("/kaggle/input/hms-harmful-brain-activity-classification")

In [None]:
train_df = pd.read_csv(DATA_PATH / "train.csv")

In [None]:
label_columns = train_df.filter(like="_vote").columns.to_list()

data = train_df.groupby(["eeg_id"])[label_columns].sum()
n = data.sum(axis=1)
for x in label_columns:
    data[x] /= n

In [None]:
data = data.astype(np.float32)
eeg_ids = data.index.to_numpy()

## Define target image

In [None]:
img = spectrogram_from_eeg4(DATA_PATH / "train_eegs" / f"{eeg_ids[0]}.parquet")
global_label = data.loc[eeg_ids[0]].to_numpy()

In [None]:
img.shape, img.min(), img.max()

In [None]:
print("Global label = ", global_label)
plt.imshow(img[:, :, 0])

If we use images in float32 format for MixUp they should be within [0,1] range

In [None]:
def normalize_image(image):
    """
    Normalize to [0, 1]
    """
    max_value = image.max()
    min_value = image.min()
    return (image - min_value) / (max_value - min_value)

For mixing we need to define:
* `reference_data` which is generator or iterator
* `read_fn` function that takes elements of `reference_data` as input and returns dictionary of the type 

```python
 { 
    "image": np.ndarray,
     "mask": Optional[np.ndarray],
     "global_label": Optional[np.ndarray]
}
```
and global_label is 1D vector representing label of the image. In the case of classification it is one hot representation of the target.


In `read_fn` we process images for mixing and we can have separate [Albumentations](https://albumentations.ai/) pipeline applied to it.

In [None]:
target_height = 128
target_width = 100

In [None]:
transform_reference_data = A.Compose([A.RandomCrop(height=target_height, width=target_width, p=1)])

In [None]:
eeg_ids[:10]

In [None]:
def read_fn(eeg_id):
    img = spectrogram_from_eeg4(DATA_PATH / "train_eegs" / f"{eeg_id}.parquet")
    img = normalize_image(img)
    
    result_image = transform_reference_data(image=img)["image"]    
    global_label = data.loc[eeg_id].to_numpy()
        
    return {"image": result_image, "global_label": global_label}

In [None]:
transform = A.Compose([A.RandomCrop(height=target_height, width=target_width, p=1), 
                       A.MixUp(reference_data=list(eeg_ids), read_fn=read_fn, alpha=0.4, p=1)])

In [None]:
transformed = transform(image = normalize_image(img), global_label=global_label)
print("Global_label = ", transformed["global_label"])
plt.imshow(transformed["image"][:, :, 0])