# IMPORT LIBRARIES

In [None]:
!pip install -U scipy

In [None]:
import numpy as np 
import pandas as pd 
import os
from glob import glob
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split


In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
from skimage import img_as_float
from skimage import exposure
from skimage.restoration import denoise_tv_chambolle, denoise_bilateral, denoise_wavelet, estimate_sigma
from skimage.filters import threshold_multiotsu,threshold_otsu
from skimage.morphology import erosion, dilation, opening, closing,disk
from skimage.measure import label, regionprops,find_contours
from skimage.morphology import erosion, dilation, opening, closing,disk
import cv2

In [None]:
from tensorflow.keras.applications import MobileNet,VGG19,EfficientNetV2S
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten,BatchNormalization,Input,Conv2D,MaxPooling2D,concatenate,Conv2DTranspose
from keras.models import Sequential


# LOAD DATA

In [None]:
all_xray_df = pd.read_csv('../input/sample/sample_labels.csv')
all_image_paths = {os.path.basename(x): x for x in 
                   glob(os.path.join('..','input', 'data','images*', '*', '*.png'))}

all_xray_df['path'] = all_xray_df['Image Index'].map(all_image_paths.get) #get image path
all_xray_df.head()

# VISUALIZATION

In [None]:
# 1 image can have multiple class
label_counts = all_xray_df['Finding Labels'].value_counts()
label_counts

In [None]:
# Top 15 class
label_counts = all_xray_df['Finding Labels'].value_counts()[1:15]
fig, ax1 = plt.subplots(1,1,figsize = (12, 8))
ax1.bar(np.arange(len(label_counts))+0.5, label_counts)
ax1.set_xticks(np.arange(len(label_counts))+0.5)
_ = ax1.set_xticklabels(label_counts.index, rotation = 90)

=> imbalance dataset

# DATA PROCESSING

## One-hot

In [None]:
all_labels = ['Atelectasis', 'Consolidation', 'Infiltration', 'Pneumothorax', 'Edema', 'Emphysema', 'Fibrosis', 'Effusion', 'Pneumonia', 'Pleural_Thickening', 
'Cardiomegaly', 'Nodule', 'Mass', 'Hernia'] # taken from paper

# make one-hot
for lb in all_labels:
    all_xray_df[lb] = all_xray_df['Finding Labels'].map(lambda result: 1.0 if lb in result else 0)
all_xray_df.head(20) 


## Train test split

In [None]:
# lấy 40000 sample có trọng số
sample_weights = all_xray_df['Finding Labels'].map(lambda x: len(x.split('|')) if len(x)>0 else 0).values + 4e-2
sample_weights /= sample_weights.sum()
all_xray_df = all_xray_df.sample(5000, weights=sample_weights)


In [None]:
train_df, valid_df = train_test_split(all_xray_df, 
                                   test_size = 0.2, 
                                   stratify = all_xray_df['Finding Labels'].map(lambda x: x[:4]))
print('train', train_df.shape[0], 'validation', valid_df.shape[0])

In [None]:
train_df.head()

# Data Generator

In [None]:
IMAGE_SIZE = 256  #128

In [None]:
strategy = tf.distribute.MirroredStrategy()
print('DEVICES AVAILABLE: {}'.format(strategy.num_replicas_in_sync))

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

BATCH_SIZE_PER_REPLICA = 32

#We obtain the BATCH_SIZE dividing by the number of devices. 
BATCH_SIZE = BATCH_SIZE_PER_REPLICA*strategy.num_replicas_in_sync


In [None]:
core_idg = ImageDataGenerator(horizontal_flip=True,
                             )

In [None]:
train_generator = core_idg.flow_from_dataframe(dataframe=train_df,
                                        directory=None,
                                        x_col = 'path',
                                        y_col = all_labels,
                                        class_mode = 'raw',
#                                         classes = all_labels,
                                        target_size = (IMAGE_SIZE,IMAGE_SIZE),
                                        color_mode = 'grayscale',
                                        batch_size = BATCH_SIZE,
                                        shuffle = True)
valid_generator = core_idg.flow_from_dataframe(dataframe=valid_df,
                                        directory=None,
                                        x_col = 'path',
                                        y_col = all_labels,
                                        class_mode = 'raw',
#                                         classes = all_labels,
                                        target_size = (IMAGE_SIZE,IMAGE_SIZE),
                                        color_mode = 'grayscale',
                                        batch_size = 256)

In [None]:
plt.figure(figsize=(8,4))
plt.xticks(rotation = 90)
plt.bar(all_labels, train_generator.labels.sum(axis = 0)/train_generator.n * 100)
plt.title('Percentage of different conditions in train dataset')
plt.xlabel('Conditions')
plt.ylabel('Percentage')
plt.show()


## Convert to TF dataset

In [None]:
raw_train_data = tf.data.Dataset.from_generator(
                lambda: train_generator,
                output_types = (tf.float32, tf.float32), 
                output_shapes = ([None,IMAGE_SIZE,IMAGE_SIZE,1], [None,len(all_labels)]))


raw_valid_data = tf.data.Dataset.from_generator(
                lambda: valid_generator,
                output_types=(tf.float32, tf.float32), 
                output_shapes=([None,IMAGE_SIZE,IMAGE_SIZE,1], [None,len(all_labels)]))

In [None]:
raw_train_data

In [None]:
one_batch_samples = next(iter(raw_train_data))

In [None]:
image_samples, label_samples = one_batch_samples

In [None]:
fig, axes = plt.subplots(4, 4, figsize = (16, 16))
for (img, lb, ax) in zip(image_samples, label_samples, axes.flatten()):
    ax.imshow(img, cmap = 'bone')
    img_lab_name = np.array(all_labels)[lb.numpy() == 1.0]
    
    ax.set_title(', '.join(img_lab_name))
    ax.axis('off')

## RESCALE

In [None]:
normalization_layer = tf.keras.layers.Rescaling(1./255)


In [None]:
scale_train_data = raw_train_data.map(lambda x,y: (normalization_layer(x),y),num_parallel_calls = AUTOTUNE)
scale_valid_data = raw_valid_data.map(lambda x,y: (normalization_layer(x),y),num_parallel_calls = AUTOTUNE)

## BACKGROUND REMOVAL

In [None]:
def background_rm(image):
    
    img_list = []
    for i in range(image.shape[0]):
        try:
            img = image[i]

            sumOfCols = np.sum(img, axis=0)
            sumOfRows = np.sum(img, axis=1)

            for i in range(len(sumOfCols)):
                if sumOfCols[i] > 0:
                    x1 = i
                    break

            for i in range(len(sumOfCols)-1,-1,-1):
                if sumOfCols[i] > 0:
                    x2 = i
                    break

            for i in range(len(sumOfRows)):
                if sumOfRows[i] > 0:
                    y1 = i
                    break

            for i in range(len(sumOfRows)-1,-1,-1):
                if sumOfRows[i] > 0:
                    y2 = i
                    break

            roi = img[y1:y2,x1:x2]
            roi = tf.keras.layers.Resizing(IMAGE_SIZE,IMAGE_SIZE) (roi)
            img_list.append(roi)
        except:
            img_list.append(img)

    return tf.convert_to_tensor(tf.stack(img_list))


In [None]:
def background_rm_tf(image,label):
    im_shape = image.shape
    [image,] = tf.py_function(background_rm, [image], [tf.float32])
    image.set_shape(im_shape)
    return image, label


In [None]:
rm_background_train_data = scale_train_data.map(background_rm_tf,num_parallel_calls = AUTOTUNE)
rm_background_valid_data = scale_valid_data.map(background_rm_tf,num_parallel_calls = AUTOTUNE)

## REMOVE NOISE

In [None]:
def denoise(image):
    return denoise_tv_chambolle(img_as_float(image), weight=0.01)


In [None]:
def denoise_tf(image,label):
    im_shape = image.shape
    [image,] = tf.py_function(denoise, [image], [tf.float32])
    image.set_shape(im_shape)
    return image, label


In [None]:
rm_noise_train_data = rm_background_train_data.map(denoise_tf,num_parallel_calls = AUTOTUNE)
rm_noise_valid_data = rm_background_valid_data.map(denoise_tf,num_parallel_calls = AUTOTUNE)

## REMOVE THE DIAPHRAGM REGION

In [None]:
def rmDiapgRegion(image):
    img = img_as_float(image)
    img = np.copy(img)
    # get Diaphragm Region
    
    brightest = img.max()
    darkest = img.min()
#     print(brightest)
    T = darkest + 0.94 * (brightest - darkest)
    binary = img > T

    closed = closing(binary) 
    dilate = dilation(closed)

    # get high density region
    l = label(dilate)
    out = (l==np.bincount(l.ravel())[1:].argmax()+1).astype(np.int64)

    # smoothing region
#     opened = closing(out)
    img[out.astype(np.float32) == 1.0] = 0
        
    return img


In [None]:
def rmDiapgRegion_tf(image,label):
    im_shape = image.shape
    [image,] = tf.py_function(rmDiapgRegion, [image], [tf.float32])
    image.set_shape(im_shape)
    return image, label


In [None]:
rm_diapg_train_data = rm_noise_train_data.map(rmDiapgRegion_tf,num_parallel_calls =AUTOTUNE)
rm_diapg_valid_data = rm_noise_valid_data.map(rmDiapgRegion_tf,num_parallel_calls =AUTOTUNE)

In [None]:
rm_diapg_train_data

In [None]:
# for image,l in rm_diapg_train_data.take(1):
#     plt.imshow(image[3],cmap = 'bone')

## CONTRAST ENRICHMENT


In [None]:
def contrast_enrich(image):
    return exposure.equalize_adapthist(img_as_float(image),clip_limit = 0.02)
        

In [None]:
def contrast_enrich_tf(image,label):
    im_shape = image.shape
    [image,] = tf.py_function(contrast_enrich, [image,], [tf.float32])
    image.set_shape(im_shape)
    return image, label


In [None]:
enhance_contrast_train_data = rm_diapg_train_data.map(contrast_enrich_tf, num_parallel_calls = AUTOTUNE)
enhance_contrast_valid_data = rm_diapg_valid_data.map(contrast_enrich_tf, num_parallel_calls = AUTOTUNE)


In [None]:
# input image
for image,l in enhance_contrast_train_data.take(1):
    plt.imshow(image[1],cmap = 'bone')

## LUNG SEGMENTATION

A segmentation-based cropped. This function will encourage model to focus on the lungs

In [None]:
inputs = Input((IMAGE_SIZE,IMAGE_SIZE,1))
input_resize = tf.keras.layers.Resizing(512,512, name = 'resize_input') (inputs)
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(input_resize)
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)

up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=3)
conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)

up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=3)
conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)

up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=3)
conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)

up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=3)
conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)

conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9)

model = tf.keras.Model(inputs=[inputs], outputs=[conv10])

In [None]:
model.load_weights("/kaggle/input/unet-lung-segmentation-weights-for-chest-x-rays/cxr_reg_weights.best.hdf5")

In [None]:
def lung_segment(image):
    pred = model.predict(image,verbose = 0) > 0.4
    pred = pred.astype(np.int32) # 512,512
    img_list = []
    
    for i in range(image.shape[0]):
        l = label(pred[i])
        z = np.argpartition(np.bincount(l.ravel())[1:],-2)

        out_1 = (l== z[-2] +1).astype(int)
        out_2 = (l== z[-1] +1).astype(int)
        combine = out_1 + out_2
        combine = tf.squeeze(combine)    

        minc_local, maxc_local = 1000,0
        
        label_image = label(combine)
        regions = regionprops(label_image)

        for region in regions:
            minr, minc, maxr, maxc = region.bbox
            if minc_local > minc:
                minc_local = minc
            if maxc_local < maxc:
                maxc_local = maxc

#         if maxc_local - minc_local <= 200 or maxc_local - minc_local >= 300:
#             img_list.append(image[i])
#             continue
        img = tf.keras.layers.Resizing(512,512) (image[i])
        if minc_local - 20 >=0:
            minc_local -= 20
        if maxc_local + 20 <= img.shape[0]:
            maxc_local += 20
        z = img[:,minc_local:maxc_local]
        z = tf.keras.layers.Resizing(IMAGE_SIZE,IMAGE_SIZE,interpolation = "area") (z)
        img_list.append(z)

    return tf.convert_to_tensor(tf.stack(img_list))

In [None]:
def lung_segment_tf(image,label):
    im_shape = image.shape
    [image,] = tf.py_function(lung_segment, [image], [tf.float32])
    image.set_shape(im_shape)
    return image, label


In [None]:
lung_segment_train_data = enhance_contrast_train_data.map(lung_segment_tf, num_parallel_calls = AUTOTUNE)
lung_segment_valid_data = enhance_contrast_valid_data.map(lung_segment_tf, num_parallel_calls = AUTOTUNE)


In [None]:
def plot_lung_segment_img(image_sample):
    test_img = tf.expand_dims(image_sample,0)
    test_img /= 255.
    test_img = contrast_enrich(test_img)
    test_img = lung_segment(test_img)[0] # it return batchsize,_,_
    
    fig,axes = plt.subplots(1,2,figsize = (15,15))
    axes[0].imshow(image_sample,cmap = 'bone')
    axes[0].set_title("Original Image")
    
    axes[1].imshow(test_img,cmap = 'bone')
    axes[1].set_title("Segmentation-based cropped")


In [None]:
plot_lung_segment_img(image_samples[8])

# MODEL

In [None]:
train_data = scale_train_data.map(lambda x,y: (tf.image.grayscale_to_rgb(x),y)).cache().prefetch(buffer_size = AUTOTUNE)
valid_data = scale_valid_data.map(lambda x,y: (tf.image.grayscale_to_rgb(x),y)).cache().prefetch(buffer_size = AUTOTUNE)

In [None]:
test_X, test_Y = next(core_idg.flow_from_dataframe(dataframe=valid_df,
                                        directory=None,
                                        x_col = 'path',
                                        y_col = all_labels,
                                        class_mode = 'raw',
#                                         classes = all_labels,
                                        target_size = (IMAGE_SIZE,IMAGE_SIZE),
                                        color_mode = 'grayscale',
                                        batch_size = 1024))


In [None]:
test_X_normal = tf.keras.layers.Rescaling(1./255) (test_X)
test_X_normal = tf.image.grayscale_to_rgb(tf.convert_to_tensor(test_X_normal))

In [None]:
test_X_process = test_X.copy()

test_X_process = background_rm(test_X_process)
test_X_process = tf.keras.layers.Rescaling(1./255) (test_X_process)
test_X_process = denoise(test_X_process)
test_X = rmDiapgRegion(test_X)
test_X_process = contrast_enrich(test_X_process)
# test_X = lung_segment(test_X) #use this will degrade performance
test_X_process = tf.image.grayscale_to_rgb(tf.convert_to_tensor(test_X_process))

## DENSE121

In [None]:
input = tf.keras.layers.Input([IMAGE_SIZE, IMAGE_SIZE, 3], dtype = tf.float32)

dense121 = tf.keras.applications.DenseNet121(include_top=False,weights="imagenet",pooling='avg')
dense121 = dense121(input,)

output = Dense(len(all_labels), activation = 'sigmoid') (dense121)

dense121_model = tf.keras.Model(inputs=[input], outputs=[output])
dense121_model.summary()

In [None]:
dense121_model.compile(optimizer = tf.keras.optimizers.experimental.Adam(0.0001), loss = 'binary_crossentropy',
                           metrics = ['binary_accuracy', tf.keras.metrics.AUC()])

In [None]:
steps_per_epoch = train_df.shape[0]  // BATCH_SIZE
steps_per_epoch

In [None]:
dense121_model.load_weights("/kaggle/input/vgg19h5/dense121_normal.h5")

In [None]:
dense121_model.fit(train_data, 
            steps_per_epoch = 750,
#           validation_data = valid_data, 
            epochs = 1, 
            verbose = 1,
          )

In [None]:
# dense121_model.save_weights("dense121_normal_.h5")

### NORMAL

In [None]:
y_pred_dense121_normal = dense121_model.predict(test_X_normal)
y_pred_dense121_normal[:5]


In [None]:
from sklearn.metrics import roc_curve, auc,f1_score
fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
mean_auc_dense121_normal = 0
for (idx, c_label) in enumerate(all_labels):
    fpr, tpr, thresholds = roc_curve(test_Y[:,idx].astype(int), y_pred_dense121_normal[:,idx])
    c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)'  % (c_label, auc(fpr, tpr)))
    mean_auc_dense121_normal += auc(fpr, tpr)

mean_auc_dense121_normal /= 14
print(mean_auc_dense121_normal)
c_ax.legend()
c_ax.set_xlabel('False Positive Rate')
c_ax.set_ylabel('True Positive Rate')


### PREPROCESS

In [None]:
dense121_model.load_weights("/kaggle/input/vgg19h5/dense121_3.h5")

In [None]:
y_pred_dense121_preprocess = dense121_model.predict(test_X_process)
y_pred_dense121_preprocess[:5]


In [None]:
from sklearn.metrics import roc_curve, auc,f1_score
fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
mean_auc_dense121_preprocess = 0
for (idx, c_label) in enumerate(all_labels):
    fpr, tpr, thresholds = roc_curve(test_Y[:,idx].astype(int), y_pred_dense121_preprocess[:,idx])
    c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)'  % (c_label, auc(fpr, tpr)))
    mean_auc_dense121_preprocess += auc(fpr, tpr)

mean_auc_dense121_preprocess /= 14
print(mean_auc_dense121_preprocess)
c_ax.legend()
c_ax.set_xlabel('False Positive Rate')
c_ax.set_ylabel('True Positive Rate')


## VGG19

In [None]:
input = tf.keras.layers.Input([IMAGE_SIZE, IMAGE_SIZE, 3], dtype = tf.float32)
processed_input = tf.keras.applications.vgg19.preprocess_input(input)

vgg19 = VGG19(include_top = False,weights = 'imagenet') 
vgg19 = vgg19(input)

normal = BatchNormalization() (vgg19)
flatten = GlobalAveragePooling2D() (normal)

dense1 = Dense(256,activation = 'relu') (flatten)

output = Dense(len(all_labels), activation = 'sigmoid') (dense1)

vgg19_model = tf.keras.Model(inputs=[input], outputs=[output])
vgg19_model.summary()

In [None]:
# vgg19_model.load_weights("/kaggle/input/vgg19h5/vgg19_process_lung_2.h5")

In [None]:
vgg19_model.compile(optimizer = tf.keras.optimizers.experimental.Adam(0.0001), loss = 'binary_crossentropy',
                           metrics = ['binary_accuracy', tf.keras.metrics.AUC()])

In [None]:
steps_per_epoch = train_df.shape[0]  // BATCH_SIZE
steps_per_epoch

In [None]:
# vgg19_model.fit(train_data, 
#             steps_per_epoch = steps_per_epoch,
# #           validation_data = valid_data, 
#             epochs = 1, 
#             verbose = 1,
#           )

### NORMAL

In [None]:
vgg19_model.load_weights("/kaggle/input/weights/vgg19_no2.h5")

In [None]:
y_pred_vgg19_normal = vgg19_model.predict(test_X_normal)
y_pred_vgg19_normal[:5]

In [None]:
from sklearn.metrics import roc_curve, auc
fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
mean_auc_vgg19_normal = 0
for (idx, c_label) in enumerate(all_labels):
    fpr, tpr, thresholds = roc_curve(test_Y[:,idx].astype(int), y_pred_vgg19_normal[:,idx])
    c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)'  % (c_label, auc(fpr, tpr)))
    mean_auc_vgg19_normal += auc(fpr, tpr)
mean_auc_vgg19_normal /= 14
print(mean_auc_vgg19_normal)
c_ax.legend()
c_ax.set_xlabel('False Positive Rate')
c_ax.set_ylabel('True Positive Rate')


### PREPROCESS

In [None]:
vgg19_model.load_weights("/kaggle/input/vgg19h5/vgg19.h5")

In [None]:
y_pred_vgg19_process = vgg19_model.predict(test_X_process)
y_pred_vgg19_process[:5]

In [None]:
from sklearn.metrics import roc_curve, auc,f1_score
fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
mean_auc_vgg19_process = 0
for (idx, c_label) in enumerate(all_labels):
    fpr, tpr, thresholds = roc_curve(test_Y[:,idx].astype(int), y_pred_vgg19_process[:,idx])
    c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)'  % (c_label, auc(fpr, tpr)))
    mean_auc_vgg19_process += auc(fpr, tpr)

mean_auc_vgg19_process /= 14
print(mean_auc_vgg19_process)
c_ax.legend()
c_ax.set_xlabel('False Positive Rate')
c_ax.set_ylabel('True Positive Rate')


## EfficientNetV2S 

In [None]:
input = tf.keras.layers.Input([IMAGE_SIZE, IMAGE_SIZE, 3], dtype = tf.float32)

eff = tf.keras.applications.EfficientNetV2S(include_top = False,weights = 'imagenet') 
eff = eff(input)

normal = BatchNormalization() (eff)
flatten = GlobalAveragePooling2D() (normal)

dense1 = Dense(256,activation = 'relu') (flatten)

output = Dense(len(all_labels), activation = 'sigmoid') (dense1)

eff_model = tf.keras.Model(inputs=[input], outputs=[output])
eff_model.summary()

In [None]:
eff_model.compile(optimizer = tf.keras.optimizers.experimental.AdamW(0.0001), loss = 'binary_crossentropy',
                           metrics = ['binary_accuracy', tf.keras.metrics.AUC()])


In [None]:
steps_per_epoch = train_df.shape[0]  // BATCH_SIZE
steps_per_epoch


In [None]:
# eff_model.fit(train_data, 
#             steps_per_epoch = steps_per_epoch,
# #           validation_data = valid_data, 
#             epochs = 1, 
#             verbose = 1,
#           )


### NORMAL

In [None]:
eff_model.load_weights("/kaggle/input/weights/effv2s.h5")

In [None]:
y_pred_eff_normal = eff_model.predict(test_X_normal)
y_pred_eff_normal[:5]


In [None]:
from sklearn.metrics import roc_curve, auc
fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
mean_auc_eff_normal = 0
f1_eff_normal = 0
for (idx, c_label) in enumerate(all_labels):
    fpr, tpr, thresholds = roc_curve(test_Y[:,idx].astype(int), y_pred_eff_normal[:,idx])
    f1_eff_normal += f1_score
    c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)'  % (c_label, auc(fpr, tpr)))
    mean_auc_eff_normal += auc(fpr, tpr)

mean_auc_eff_normal /=14

print(mean_auc_eff_normal)
c_ax.legend()
c_ax.set_xlabel('False Positive Rate')
c_ax.set_ylabel('True Positive Rate')

### PREPROCESS

In [None]:
eff_model.load_weights("/kaggle/input/vgg19h5/effv2s_1.h5")

In [None]:
y_pred_eff_process = eff_model.predict(test_X_process)
y_pred_eff_process[:5]


In [None]:
from sklearn.metrics import roc_curve, auc
fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
mean_auc_eff_process = 0
for (idx, c_label) in enumerate(all_labels):
    fpr, tpr, thresholds = roc_curve(test_Y[:,idx].astype(int), y_pred_eff_process[:,idx])
    c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)'  % (c_label, auc(fpr, tpr)))
    mean_auc_eff_process += auc(fpr, tpr)
    
mean_auc_eff_process /=14
print(mean_auc_eff_process)
c_ax.legend()
c_ax.set_xlabel('False Positive Rate')
c_ax.set_ylabel('True Positive Rate')

## ConvNet

In [None]:
import tensorflow_hub as hub

In [None]:
MODEL_PATH = "https://tfhub.dev/sayakpaul/convnext_base_21k_1k_224_fe/1"

In [None]:
input = tf.keras.layers.Input([IMAGE_SIZE, IMAGE_SIZE, 3], dtype = tf.float32, name = 'input')
input_conv = tf.keras.layers.Resizing(224,224, name = 'resize_input') (input)

hub_layer = hub.KerasLayer(MODEL_PATH, trainable=True, name = "ConvNet")
conv_model = hub_layer (input_conv)

dense1 = Dense(768, activation = tf.keras.activations.gelu, name = "dense_768") (conv_model)
dense1 = Dense(512, activation = tf.keras.activations.gelu, name = "dense_512") (dense1)
dense1 = Dense(128, activation = tf.keras.activations.gelu, name = "dense_128") (dense1)
output = Dense(len(all_labels), activation = 'sigmoid',name = 'output') (dense1)

conv_model = tf.keras.Model(inputs=[input], outputs=[output])
conv_model.summary()

In [None]:
conv_model.compile(optimizer = tf.keras.optimizers.experimental.AdamW(3e-5), loss = 'binary_crossentropy',
                           metrics = ['binary_accuracy', tf.keras.metrics.AUC()])


In [None]:
steps_per_epoch = train_df.shape[0]  // BATCH_SIZE
steps_per_epoch


In [None]:
# conv_model.fit(train_data, 
#             steps_per_epoch = steps_per_epoch,
# #           validation_data = valid_data, 
#             epochs = 1, 
#             verbose = 1,
#           )

### NORMAL

In [None]:
conv_model.load_weights('/kaggle/input/weights/convnet/my_checkpoint')

In [None]:
y_pred_conv_normal = conv_model.predict(test_X_normal)
y_pred_conv_normal[:5]


In [None]:
from sklearn.metrics import roc_curve, auc
fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
mean_auc_conv_normal = 0
for (idx, c_label) in enumerate(all_labels):
    fpr, tpr, thresholds = roc_curve(test_Y[:,idx].astype(int), y_pred_conv_normal[:,idx])
    c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)'  % (c_label, auc(fpr, tpr)))
    mean_auc_conv_normal += auc(fpr, tpr)

mean_auc_conv_normal /= 14
print(mean_auc_conv_normal)
c_ax.legend()
c_ax.set_xlabel('False Positive Rate')
c_ax.set_ylabel('True Positive Rate')

### PREPROCESS

In [None]:
conv_model.load_weights('/kaggle/input/vgg19h5/conv-checkpoint/my_checkpoint')


In [None]:
y_pred_conv_process = conv_model.predict(test_X_process)
y_pred_conv_process[:5]


In [None]:
from sklearn.metrics import roc_curve, auc
fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
mean_auc_conv_process = 0
for (idx, c_label) in enumerate(all_labels):
    fpr, tpr, thresholds = roc_curve(test_Y[:,idx].astype(int), y_pred_conv_process[:,idx])
    c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)'  % (c_label, auc(fpr, tpr)))
    mean_auc_conv_process += auc(fpr, tpr)

mean_auc_conv_process /= 14
print(mean_auc_conv_process)
c_ax.legend()
c_ax.set_xlabel('False Positive Rate')
c_ax.set_ylabel('True Positive Rate')
c_ax.set_title("AUC for each class")