In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random

import keras
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report,confusion_matrix
from keras.callbacks import ReduceLROnPlateau

import medmnist
from medmnist import INFO

import sys
sys.path.insert(0,"../")

import dataset_without_pytorch

from functions import split_data, get_label_counts, build_basic_model, argmax_array, generate_training_mixup_images
from plotting_functions import plot_history
import pandas as pd

In [None]:
data_flag = 'pneumoniamnist'

download = True

image_size = 128

info = INFO[data_flag]

DataClass = getattr(dataset_without_pytorch, info['python_class'])

In [None]:
# load the data
train_dataset = DataClass(split='train', download=download, size = image_size)
test_dataset = DataClass(split='test', download=download, size = image_size)
val_dataset = DataClass(split='val', download=download, size = image_size)

In [None]:
RANDOM_SEED = 1

X_train, y_train, X_val, y_val, X_test_A, y_test_A, X_test_B, y_test_B = split_data(
  train_dataset, test_dataset, val_dataset, RANDOM_SEED = RANDOM_SEED, one_hot_encoded = True, num_classes=2, image_size = 128)


In [None]:
# As a sanity check to make sure we have the correct test data, check counts

get_label_counts(y_train,y_val,y_test_A,y_test_B,class_names = ["Normal","Pneumonia"])

In [None]:
test_set_A_df = pd.DataFrame({
    "image_index":[i for i in range(len(y_test_A))],
    "class":[label[1] for label in y_test_A]}) # 0 if normal, 1 if pneumonia

In [None]:
MUBA_ITERS = 30

muba_df = pd.DataFrame(columns = [
                        "alpha_class_0",
                        "alpha_class_1",
                        "image_0",
                        "image_1",
                        "mixup_image",
                        "type",
                        "label",
        ])

for index_0, row_0 in test_set_A_df[test_set_A_df["class"]==0].iterrows(): # For each class 0 image
    for index_1, row_1 in test_set_A_df[test_set_A_df["class"]==1].iterrows(): # For each class 1 image
        for i in range(MUBA_ITERS):
            print(index_0,index_1)
            alpha = (1/MUBA_ITERS) * np.random.rand() + ( (i) / MUBA_ITERS) # Create an alpha value inside a bin of width 1/MUBA_ITERS
            new_img = alpha * X_test_A[int(row_0["image_index"])] + (1 - alpha) * X_test_A[int(row_1["image_index"])]
            label = 0 if alpha > 0.5 else 1 # If alpha is greater than 0.5, there is a higher proportion of a class 0 image.
            
            row = pd.DataFrame([({ "alpha_class_0": alpha, # row 0 is multiplied by lam
                        "alpha_class_1": 1-alpha,
                        "class_0_index": index_0,
                        "class_1_index": index_1,
                        "mixup_image": new_img,
                        "type":"mix",
                        "label":label
                        })])
            muba_df = pd.concat([muba_df, row],axis=0, ignore_index=True)

In [None]:
muba_df.head(1)

In [None]:
muba_df.to_pickle("./mixup_images.csv")