<a href="https://colab.research.google.com/github/zSoftwareRepository/ABAP/blob/master/Create_Model_EfficientNetB0_Base_Model_GC_Version.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Wed Jun 16 22:32:10 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.27       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   42C    P8     9W /  70W |      3MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

Adding text

In [None]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import pickle
import os
import ast

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from skimage import exposure
from random import randrange

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
#import pydicom
#from pydicom.pixel_data_handlers.util import apply_voi_lut
#import pylibjpeg

In [None]:
import tensorflow as tf

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print(tf.__version__)

Num GPUs Available:  1
2.5.0


In [None]:
from tensorflow.keras.preprocessing.image import array_to_img, img_to_array
from PIL import Image, ImageEnhance, ImageFilter

In [None]:
!pip install tensorflow-addons



In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img, img_to_array
from tensorflow.keras.layers import Layer, Flatten

from tensorflow.keras import backend as K
from tensorflow.keras import initializers, regularizers, constraints, optimizers, layers

import tensorflow_addons as tfa

In [None]:
img_size = 300

In [None]:
# Make a simple linear VOI LUT from the raw (stored) pixel data
def make_lut(storedPixels, windowWidth, windowLevel, p_i):
    
    # Slope and Intercept set to 1 and 0 for X-ray. Get these from DICOM tags instead if using 
    # on a modality that requires them (CT, PT etc)
    slope = 1.0
    intercept = 0.0
    minPixel = int(np.amin(storedPixels))
    maxPixel = int(np.amax(storedPixels))

    # Make an empty array for the LUT the size of the pixel 'width' in the raw pixel data
    lut = [0] * (maxPixel + 1)
    
    # Invert pixels and windowLevel for MONOCHROME1. We invert the specified windowLevel so that 
    # increasing the level value makes the images brighter regardless of photometric intrepretation
    invert = False
    if p_i == "MONOCHROME1":
        invert = True
    else:
        windowLevel = (maxPixel - minPixel) - windowLevel
        
    # Loop through the pixels and calculate each LUT value
    for storedValue in range(minPixel, maxPixel):
        modalityLutValue = storedValue * slope + intercept
        voiLutValue = (((modalityLutValue - windowLevel) / windowWidth + 0.5) * 255.0)
        clampedValue = min(max(voiLutValue, 0), 255)
        if invert:
            lut[storedValue] = round(255-clampedValue)
        else:
            lut[storedValue] = round(clampedValue)
        
    return lut

In [None]:
# Apply the LUT to a pixel array
def apply_lut(pixels_in, lut):
    pixels_in = pixels_in.flatten()
    pixels_out = [0] * len(pixels_in)
    for i in range(0, len(pixels_in)):
        pixel = pixels_in[i]
        pixels_out[i] = int(lut[pixel])
    return pixels_out

In [None]:
def get_filepaths(directory):
    """
    This function will generate the file names in a directory 
    tree by walking the tree either top-down or bottom-up. For each 
    directory in the tree rooted at directory top (including top itself), 
    it yields a 3-tuple (dirpath, dirnames, filenames).
    """
    file_paths = []  # List which will store all of the full filepaths.
    file_image = []  # Image filename

    # Walk the tree.
    for root, directories, files in os.walk(directory):
        for filename in files:
            # Join the two strings in order to form the full filepath.
            
            if '.jpg' in filename:
                filepath = os.path.join(root, filename)
                file_paths.append(filepath)  # Add it to the list.
            
                file_image.append(filename.split('.')[0])

    return file_paths, file_image  # Self-explanatory.

In [None]:
class DataGeneratorDicom(tf.keras.utils.Sequence):
        
    def __init__(self, df_X, arr_Y, batch_size=32, num_classes=None,shuffle=False):
        self.batch_size = batch_size
        self.df_X = df_X
        self.arr_Y = arr_Y
        self.num_classes = num_classes
        self.indices = self.df_X.index.tolist()
        self.shuffle = shuffle
        self.fix_monochrome = True
        self.on_epoch_end()
        
    def __len__(self):
        return int(np.floor(len(self.indices) / self.batch_size))

    def __getitem__(self, index):
        index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]
        
        X, y = self.__get_data(batch)
        return X, y
    
    def n(self):
        return len(self.indices)
    
    def on_epoch_end(self):
        self.index = np.arange(len(self.indices))
        if self.shuffle == True:
            np.random.shuffle(self.index)

    def __get_data(self, batch):
        X1 = []
        y  = []
               
        for i, id in enumerate(batch):
            
            # Data
            file = self.df_X.iloc[self.indices[id]][0]
            
            dicom = pydicom.read_file(file)
    
            resize_factor_rows = round(dicom.Rows * dicom.ImagerPixelSpacing[0])
            resize_factor_cols = round(dicom.Columns * dicom.ImagerPixelSpacing[1])
    
            data = dicom.pixel_array
               
            # depending on this value, X-ray may look inverted - fix that:
            if self.fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
                data = np.amax(data) - data
       
            data = exposure.equalize_hist(data)
    
            img = array_to_img(data.reshape(data.shape[0], data.shape[1],1))
            img = img.resize((resize_factor_cols, resize_factor_rows),resample=Image.ANTIALIAS,reducing_gap=3.0)
            img = img.resize((img_size,img_size),resample=Image.ANTIALIAS,reducing_gap=3.0)   
            img = img_to_array(img) 
            #img /= 255.
            
            X1.append(img)

            y.append(self.arr_Y[self.indices[id]])
            
        return np.array(X1), np.array(y).reshape(self.batch_size,1)

In [None]:
class DataGeneratorlut(tf.keras.utils.Sequence):
        
    def __init__(self, df_X, arr_Y, batch_size=32, num_classes=None,shuffle=False, mode='train'):
        self.batch_size = batch_size
        self.df_X = df_X
        self.arr_Y = arr_Y
        self.num_classes = num_classes
        self.indices = self.df_X.index.tolist()
        self.shuffle = shuffle
        self.fix_monochrome = True
        self.on_epoch_end()
        
    def __len__(self):
        return int(np.floor(len(self.indices) / self.batch_size))

    def __getitem__(self, index):
        index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]
        
        X, y = self.__get_data(batch)
        return X, y
    
    def n(self):
        return len(self.indices)
    
    def on_epoch_end(self):
        self.index = np.arange(len(self.indices))
        if self.shuffle == True:
            np.random.shuffle(self.index)

    def __get_data(self, batch):
        X1 = []
        y  = []
               
        for i, id in enumerate(batch):
            
            # Data
            file = self.df_X.iloc[self.indices[id]][0]
            
            dicom = pydicom.read_file(file)
    
            resize_factor_rows = round(dicom.Rows * dicom.ImagerPixelSpacing[0])
            resize_factor_cols = round(dicom.Columns * dicom.ImagerPixelSpacing[1])
    
            data = dicom.pixel_array
  
            minPixel = np.min(data)
            maxPixel = np.max(data)
            windowWidth = maxPixel - minPixel
            windowLevel = (minPixel + maxPixel) / 2
    
            lut = make_lut(data, windowWidth, windowLevel, dicom.PhotometricInterpretation)
            data = apply_lut(data, lut)
            
            data = np.array(data)
    
            img = array_to_img(data.reshape(dicom.pixel_array.shape[0], dicom.pixel_array.shape[1],1))
            img = img.resize((resize_factor_cols, resize_factor_rows),resample=Image.ANTIALIAS,reducing_gap=3.0)
            img = img.resize((img_size,img_size),resample=Image.ANTIALIAS,reducing_gap=3.0)   
            img = img_to_array(img) 
            #img /= 255.
            
            X1.append(img)
            y.append(self.arr_Y[self.indices[id]])
            
        return np.array(X1), np.array(y).reshape(self.batch_size,1)

In [None]:
class DataGeneratorjpg(tf.keras.utils.Sequence):
        
    def __init__(self, df_X, arr_Y, batch_size=32, num_classes=None,shuffle=False,mode='train'):
        self.batch_size = batch_size
        self.df_X = df_X
        self.arr_Y = arr_Y
        self.num_classes = num_classes
        self.indices = self.df_X.index.tolist()
        self.shuffle = shuffle
        self.fix_monochrome = True
        #self.angles = [-10,-5,-2,0,0,0,0,2,5,10]
        self.angles = [-10,-5,-3,-1,0,0,0,0,1,3,5,10]
        self.mode = mode
        self.on_epoch_end()
        
    def __len__(self):
        return int(np.floor(len(self.indices) / self.batch_size))

    def __getitem__(self, index):
        index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]
        
        X, y = self.__get_data(batch)
        return X, y
    
    def n(self):
        return len(self.indices)
    
    def on_epoch_end(self):
        self.index = np.arange(len(self.indices))
        if self.shuffle == True:
            np.random.shuffle(self.index)

    def __get_data(self, batch):
        X = []
        y = []
               
        for i, id in enumerate(batch):
            
            # Data
            file = self.df_X.iloc[self.indices[id]][0]
            img = load_img(file,color_mode="grayscale")
            
            if self.mode == 'train':
                rand_inx = randrange(0,14)
                
                if rand_inx in [1,12]:
                    img = img.transpose(Image.FLIP_LEFT_RIGHT)      
                elif rand_inx in [0,13]:
                    #if rand_inx == 0:
                    img = img.filter(ImageFilter.BLUR)
                else:    
                    deg = round(self.angles[rand_inx],0)
            
                    if deg != 0:
                        img = img.rotate(deg,expand=False)  
            
            X.append(np.asarray(img).astype(np.float32))
            y.append(self.arr_Y[self.indices[id]])

        return np.array(X), np.array(y).reshape(self.batch_size,1)

In [None]:
train_study_level = pd.read_csv('drive/MyDrive/data/train_study_level.csv')
train_image_level = pd.read_csv('drive/MyDrive/data/train_image_level.csv')

In [None]:
train_study_level['StudyInstanceUID'] = train_study_level['id'].apply(lambda x: x.split('_')[0])

In [None]:
train = train_study_level.merge(train_image_level, how='inner', on='StudyInstanceUID')

In [None]:
none_label = "[{'x':0, 'y':0, 'width':1,'height':1}]"

In [None]:
for index, row in train.iterrows():
    if 'none' in row[8]:
        train.iloc[index,7] = none_label

In [None]:
train['boxlen'] = train['boxes'].apply(lambda x: len(ast.literal_eval(x)))

In [None]:
train['target'] = train['label'].apply(lambda x: 0 if (x.split(' ')[0] == 'none') else 1)

In [None]:
train_file_path, train_image_id = get_filepaths('drive/MyDrive/data/train300')

In [None]:
train_image = pd.concat([pd.Series(train_image_id), pd.Series(train_file_path)], ignore_index=True,axis=1)
train_image.columns = ['image_id', 'path']

In [None]:
train['image_id'] = train['id_y'].apply(lambda x: x.split('_')[0])

In [None]:
train = train.merge(train_image, how='inner', on='image_id')

In [None]:
train.head()

Unnamed: 0,id_x,Negative for Pneumonia,Typical Appearance,Indeterminate Appearance,Atypical Appearance,StudyInstanceUID,id_y,boxes,label,boxlen,target,image_id,path
0,00086460a852_study,0,1,0,0,00086460a852,65761e66de9f_image,"[{'x': 720.65215, 'y': 636.51048, 'width': 332...",opacity 1 720.65215 636.51048 1052.84563 1284....,2,1,65761e66de9f,drive/MyDrive/data/train300/00086460a852/9e830...
1,000c9c05fd14_study,0,0,0,1,000c9c05fd14,51759b5579bc_image,"[{'x':0, 'y':0, 'width':1,'height':1}]",none 1 0 0 1 1,1,0,51759b5579bc,drive/MyDrive/data/train300/000c9c05fd14/e5554...
2,00292f8c37bd_study,1,0,0,0,00292f8c37bd,f6293b1c49e2_image,"[{'x':0, 'y':0, 'width':1,'height':1}]",none 1 0 0 1 1,1,0,f6293b1c49e2,drive/MyDrive/data/train300/00292f8c37bd/73120...
3,005057b3f880_study,1,0,0,0,005057b3f880,3019399c31f4_image,"[{'x':0, 'y':0, 'width':1,'height':1}]",none 1 0 0 1 1,1,0,3019399c31f4,drive/MyDrive/data/train300/005057b3f880/e34af...
4,0051d9b12e72_study,0,0,0,1,0051d9b12e72,bb4b1da810f3_image,"[{'x': 812.54698, 'y': 1376.41291, 'width': 62...",opacity 1 812.54698 1376.41291 1435.14793 1806...,2,1,bb4b1da810f3,drive/MyDrive/data/train300/0051d9b12e72/152f6...


In [None]:
X_train, X_test, y_train, y_test = train_test_split(train['path'],train['target'], test_size=0.2, random_state=1)

In [None]:
X_train = X_train.to_frame()
X_test  = X_test.to_frame()

In [None]:
X_train = X_train.reset_index(drop=True)
X_test  = X_test.reset_index(drop=True)

In [None]:
y_train = y_train.reset_index(drop=True)
y_test  = y_test.reset_index(drop=True)

In [None]:
batch_size = 32

In [None]:
train_generator = DataGeneratorjpg(df_X=X_train, arr_Y=y_train, batch_size=batch_size, shuffle=True, mode='train')
test_generator = DataGeneratorjpg(df_X=X_test, arr_Y=y_test, batch_size=64,mode='test')

In [None]:
def createmodel_start():

    baseModel = tf.keras.applications.EfficientNetB0(include_top=False, weights=None, input_tensor=None,input_shape=(img_size, img_size, 1))
    headModel = baseModel.layers[-1].output
 
    headModel = tf.keras.layers.Dropout(0.3)(headModel)
    headModel = tf.keras.layers.MaxPooling2D(pool_size=(10, 10))(headModel)
    headModel = tf.keras.layers.Flatten(name="flatten")(headModel) 
    headModel = tf.keras.layers.Dropout(0.3)(headModel)
 
    headModel = tf.keras.layers.Dense(1000, activation=tf.keras.activations.gelu,kernel_regularizer=tf.keras.regularizers.l2(0.01))(headModel)
    headModel = tf.keras.layers.Dropout(0.3,name='dp01')(headModel)
    headModel = tf.keras.layers.Dense(500, activation=tf.keras.activations.gelu,kernel_regularizer=tf.keras.regularizers.l2(0.01))(headModel)
 
    output = tf.keras.layers.Dense(1, activation="sigmoid", kernel_regularizer=tf.keras.regularizers.l2(0.01))(headModel)
 
    model = tf.keras.Model(inputs=baseModel.input, outputs=output)
 
    return model

In [None]:
def createmodel():
           
    baseModel = tf.keras.applications.EfficientNetB0(include_top=False, weights=None, input_tensor=None,input_shape=(img_size, img_size, 1))
    headModel = baseModel.layers[-1].output
    
    headModel = tf.keras.layers.GlobalAveragePooling2D()(headModel)
     
    output = tf.keras.layers.Dense(1, activation="sigmoid", kernel_regularizer=tf.keras.regularizers.l2(0.01))(headModel)
    
    model = tf.keras.Model(inputs=baseModel.input, outputs=output)
    
    return model

In [None]:
#model = createmodel_start()

In [None]:
model = tf.keras.models.load_model('drive/MyDrive/data/tmp/checkpoint_300_0_5663_0_7426.h5')

In [None]:
learning_rate = 0.0001
weight_decay = 0.00001

optimizer = tfa.optimizers.AdamW(learning_rate=learning_rate, weight_decay=weight_decay)

model.compile(optimizer=optimizer,
              loss=tf.keras.losses.binary_crossentropy,
              metrics=[tf.keras.metrics.BinaryAccuracy()])

checkpoint_filepath = "drive/MyDrive/data/tmp/checkpoint_x.h5"
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_filepath,monitor="val_binary_accuracy", mode='max', save_best_only=True,save_weights_only=False, verbose=2)

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_binary_accuracy', mode='max', verbose=0, patience=10, restore_best_weights=True)

In [None]:
STEP_SIZE_TRAIN=train_generator.n()//train_generator.batch_size
STEP_SIZE_VALID=test_generator.n()//test_generator.batch_size

history = model.fit(x=train_generator,
                        steps_per_epoch=STEP_SIZE_TRAIN,
                        validation_data=test_generator,
                        validation_steps=STEP_SIZE_VALID, 
                        epochs=100,
                        callbacks=[checkpoint_callback,early_stopping], 
                        shuffle=True)

Epoch 1/100

Epoch 00001: val_binary_accuracy did not improve from 0.74095
Epoch 2/100

Epoch 00002: val_binary_accuracy improved from 0.74095 to 0.74671, saving model to drive/MyDrive/data/tmp/checkpoint_x.h5
Epoch 3/100

Epoch 00003: val_binary_accuracy improved from 0.74671 to 0.74753, saving model to drive/MyDrive/data/tmp/checkpoint_x.h5
Epoch 4/100

Epoch 00004: val_binary_accuracy did not improve from 0.74753
Epoch 5/100

Epoch 00005: val_binary_accuracy did not improve from 0.74753
Epoch 6/100

Epoch 00006: val_binary_accuracy did not improve from 0.74753
Epoch 7/100

Epoch 00007: val_binary_accuracy did not improve from 0.74753
Epoch 8/100

Epoch 00008: val_binary_accuracy improved from 0.74753 to 0.74836, saving model to drive/MyDrive/data/tmp/checkpoint_x.h5
Epoch 9/100

Epoch 00009: val_binary_accuracy did not improve from 0.74836
Epoch 10/100

Epoch 00010: val_binary_accuracy did not improve from 0.74836
Epoch 11/100

Epoch 00011: val_binary_accuracy did not improve from 0

In [None]:
# 875s 346ms/step - loss: 0.5229 - binary_accuracy: 0.7594 - val_loss: 0.5029 - val_binary_accuracy: 0.7706
# 97s 611ms/step - loss: 0.4464 - binary_accuracy: 0.8012 - val_loss: 0.5649 - val_binary_accuracy: 0.7508
# 96s 608ms/step - loss: 0.4438 - binary_accuracy: 0.8202 - val_loss: 0.5663 - val_binary_accuracy: 0.7426