# **Imports**

In [None]:
import pandas as pd
import numpy as np
import scipy
from scipy import ndimage
import cv2
import os
from google.colab import drive
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import torch
import torchvision
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
from PIL import Image, ImageDraw
import sklearn
from sklearn.metrics import roc_auc_score, roc_curve, auc
from tqdm import tqdm
import keras
import importlib
from keras.layers import Input
from keras.layers.core import Dense
from keras.models import Model
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, GlobalAveragePooling2D, Dropout, BatchNormalization
from keras.preprocessing import image
from keras import backend as K
drive.mount('/content/drive')

Instructions for updating:
non-resource variables are not supported in the long term


Using TensorFlow backend.


Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
class ModelFactory:
    """
    Model facotry for Keras default models
    """

    def __init__(self):
        self.models_ = dict(
            VGG16=dict(
                input_shape=(224, 224, 3),
                module_name="vgg16",
                last_conv_layer="block5_conv3",
            ),
            VGG19=dict(
                input_shape=(224, 224, 3),
                module_name="vgg19",
                last_conv_layer="block5_conv4",
            ),
            DenseNet121=dict(
                input_shape=(224, 224, 3),
                module_name="densenet",
                last_conv_layer="bn",
            ),
            ResNet50=dict(
                input_shape=(224, 224, 3),
                module_name="resnet50",
                last_conv_layer="activation_49",
            ),
            InceptionV3=dict(
                input_shape=(299, 299, 3),
                module_name="inception_v3",
                last_conv_layer="mixed10",
            ),
            InceptionResNetV2=dict(
                input_shape=(299, 299, 3),
                module_name="inception_resnet_v2",
                last_conv_layer="conv_7b_ac",
            ),
            NASNetMobile=dict(
                input_shape=(224, 224, 3),
                module_name="nasnet",
                last_conv_layer="activation_188",
            ),
            NASNetLarge=dict(
                input_shape=(331, 331, 3),
                module_name="nasnet",
                last_conv_layer="activation_260",
            ),
        )

    def get_last_conv_layer(self, model_name):
        return self.models_[model_name]["last_conv_layer"]

    def get_input_size(self, model_name):
        return self.models_[model_name]["input_shape"][:2]

    def get_model(self, class_names, model_name="DenseNet121", use_base_weights=True,
                  weights_path=None, input_shape=None):

        if use_base_weights is True:
            base_weights = "imagenet"
        else:
            base_weights = None

        base_model_class = getattr(
            importlib.import_module(
                f"keras.applications.{self.models_[model_name]['module_name']}"
            ),
            model_name)

        if input_shape is None:
            input_shape = self.models_[model_name]["input_shape"]

        img_input = Input(shape=input_shape)

        base_model = base_model_class(
            include_top=False,
            input_tensor=img_input,
            input_shape=input_shape,
            weights=base_weights,
            pooling="avg")
        x = base_model.output
        predictions = Dense(len(class_names), activation="sigmoid", name="predictions")(x)
        model = Model(inputs=img_input, outputs=predictions)

        if weights_path == "":
            weights_path = None

        if weights_path is not None:
            print(f"load model weights_path: {weights_path}")
            model.load_weights(weights_path)
        return model

In [None]:
def get_model(pretrained_weights_path,
              base_model_output_layer=-2):
  base_model_class_names = [str(i) for i in range(14)]
  base_model = ModelFactory().get_model(class_names=base_model_class_names,
                                        weights_path=pretrained_weights_path)
  predictions = base_model.layers[base_model_output_layer].output
  model = keras.models.Model(inputs=base_model.inputs, outputs=predictions)
  model.summary()
  return model

In [None]:
model = get_model('/content/drive/My Drive/data_Yonathan/chexnet_weights.h5')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Downloading data from https://github.com/keras-team/keras-applications/releases/download/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
load model weights_path: /content/drive/My Drive/data_Yonathan/chexnet_weights.h5
Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 112, 112, 64) 9408        zero_pa

In [None]:
data_dir = '/content/drive/My Drive/data_Yonathan/'
train_csv_dir = data_dir + 'train.csv'
train_data_dir = data_dir + 'train/'
validation_csv_dir = data_dir + 'dev.csv'
validation_data_dir = data_dir + 'dev/'

def get_df(csv_dir):
  csv_df = pd.read_csv(csv_dir)
  csv_df['label'] = csv_df['annotation'].isnull().replace({True: 'without objects', False: 'with objects'})
  return csv_df

train_csv = get_df(train_csv_dir)
validation_csv = get_df(validation_csv_dir)

In [None]:
# model_json = model.to_json()
# with open("simple_backbone_model.json", "w") as json_file:
#     json_file.write(model_json)
# # serialize weights to HDF5
# model.save_weights("simple_backbone_model.h5")
# print("Saved model to disk")

In [None]:
inp = model.input
concat_layers_names = [f'conv5_block{x}_concat' for x in range(8, 16)]
concat_layers = [model.get_layer(name=concat_layer_name) for concat_layer_name in concat_layers_names]
concat_layers_outputs = [layer.output for layer in concat_layers]
functor = K.function([inp, K.learning_phase()], dict(zip(concat_layers_names, concat_layers_outputs)))

In [None]:
def pickle_data(k_index, dict_params, augment_function=None, augment_function_name=None):
  df = dict_params['csv_df'].iloc[k_index * 1000: (k_index + 1) * 1000].copy()
  df = df.reset_index()
  df['image'] = None
  for name in concat_layers_names:
    df[name] = None
  for (row_idx, row) in df.iterrows():
    if (row_idx % 100 == 0):
      print(f'On {k_index}k, on {row_idx}/1000')
    try:
      img_path = dict_params['data_dir'] + row['image_name']
      img = image.load_img(img_path, target_size=(224, 224), color_mode='grayscale')
      img = image.img_to_array(img).squeeze()/255.
      img = np.stack((img,)*3, axis=-1)
      if (augment_function != None):
        img = augment_function(img)
      df['image'].iloc[row_idx] = img
      img = img.reshape((1, 224, 224, 3))
      res_dict = functor([img, 1.])
      for name in list(res_dict.keys()):
        df.at[row_idx, name] = res_dict[name]
    except:
      print(f'Image num. {row["image_name"]} failed to load')
  if (augment_function_name == None):
    df.to_pickle(data_dir + f'{dict_params["name"]}_{k_index}k')
  else:
    df.to_pickle(data_dir + f'{dict_params["name"]}_{k_index}k - {augment_function_name}')
  print(f'Successfully Finished {k_index} batch.')

In [None]:
def pickle_results(augment_function, augment_function_name):
  pickle_data(0,
              {'csv_df': validation_csv,
              'data_dir': validation_data_dir,
              'name': 'validation'},
              augment_function=augment_function,
              augment_function_name=augment_function_name)
  for i in range(8):
    pickle_data(i,
              {'csv_df': train_csv,
              'data_dir': train_data_dir,
              'name': 'train'},
              augment_function=augment_function,
              augment_function_name=augment_function_name)
  print('Done.')

In [None]:
augment_function = lambda img: ndimage.rotate(img, 15.0, reshape=False)
augment_function_name = 'rot15'
pickle_results(augment_function, augment_function_name)

On 0k, on 0/1000


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


On 0k, on 100/1000
On 0k, on 200/1000
On 0k, on 300/1000
On 0k, on 400/1000
On 0k, on 500/1000
On 0k, on 600/1000
On 0k, on 700/1000
On 0k, on 800/1000
On 0k, on 900/1000
Successfully Finished 0 batch.
On 0k, on 0/1000
On 0k, on 100/1000
On 0k, on 200/1000
On 0k, on 300/1000
On 0k, on 400/1000
On 0k, on 500/1000
On 0k, on 600/1000
On 0k, on 700/1000
On 0k, on 800/1000
On 0k, on 900/1000
Successfully Finished 0 batch.
On 1k, on 0/1000
On 1k, on 100/1000
On 1k, on 200/1000
On 1k, on 300/1000
On 1k, on 400/1000
On 1k, on 500/1000
On 1k, on 600/1000
On 1k, on 700/1000
On 1k, on 800/1000
On 1k, on 900/1000
Successfully Finished 1 batch.
On 2k, on 0/1000
On 2k, on 100/1000
On 2k, on 200/1000
On 2k, on 300/1000
On 2k, on 400/1000
On 2k, on 500/1000
On 2k, on 600/1000
On 2k, on 700/1000
On 2k, on 800/1000
On 2k, on 900/1000
Successfully Finished 2 batch.
On 3k, on 0/1000
On 3k, on 100/1000
Image num. 03124.jpg failed to load
On 3k, on 200/1000
On 3k, on 300/1000
On 3k, on 400/1000
On 3k, on 50