In [1]:
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from PIL import Image
import hsn_v1
import keras
import csv
import tensorflow as tf
from matplotlib import pyplot as plt

Using TensorFlow backend.


In [2]:
folds_path = 'folds/glas/split_0/'

In [3]:
folds_files = [str(path) for path in Path(folds_path).rglob('*.csv')]

In [4]:
def make_filter(pattern):
    def filter_inst(file):
        if pattern in file:
            return True
        else:
            return False
    return filter_inst

### Store and sort CV csv files

In [5]:
valid_csv = list(filter(make_filter('valid'), folds_files))
valid_csv.sort()

In [6]:
test_csv = list(filter(make_filter('test'), folds_files))
test_csv.sort()

In [7]:
train_csv = list(filter(make_filter('train'), folds_files))
train_csv.sort()

#### Get file names

In [8]:
import pandas as pd

In [9]:
# def read_csv(file):
#     cols = [0,1,2]
#     col_names = ['img', 'gt', 'class']    
#     df = pd.read_csv(file, header=None, usecols=cols, names=col_names)
#     files = df[col_names[0]].tolist()
#     names = [f.replace('.bmp', '') + '_' for f in files]
#     return names

In [10]:
def read_csv(file):
    cols = [0,1,2]
    col_names = ['img', 'gt', 'class']    
    df = pd.read_csv(file, header=None, usecols=cols, names=col_names)
    files = df[col_names[0]].tolist()
    names = [f.replace('.bmp', '.png') for f in files]
    return names

In [11]:
train_files = [read_csv(file) for file in train_csv]
test_files = [read_csv(file) for file in test_csv]
valid_files = [read_csv(file) for file in valid_csv]

#### Find patches

In [12]:
# imgs_path = 'img/02_glas_patch'
imgs_path = 'img/02_glas_full'

In [13]:
glas_paths = [str(path) for path in Path(imgs_path).rglob('*.png')]

In [14]:
def get_patches_files(folds, all_files):
    out = []
    for fold in folds:
        matches = []
        for name in fold:
            for f in all_files:
                if name in f:
                    matches.append(f)
        out.append(list(set(matches)))
    return out

In [15]:
train_patches = get_patches_files(train_files, glas_paths)

In [16]:
test_patches = get_patches_files(test_files, glas_paths)

In [17]:
val_patches = get_patches_files(valid_files, glas_paths)

In [18]:
for fold in train_patches:
    print(len(fold))

67
67
67
67
72


### Load model

In [19]:
IS_FINETUNE = False

In [20]:
if IS_FINETUNE:
    MODEL_NAME = 'histonet_X1.7_clrdecay_5'
else:
    MODEL_NAME = 'histonet_glas'
    
MODEL_NAME

'histonet_glas'

In [21]:
INPUT_NAME = '02_glas_full'
INPUT_MODE = 'patch'                    # {'patch', 'wsi'}
INPUT_SIZE = [224, 224]                 # [<int>, <int>] > 0
HTT_MODE = 'glas'                       # {'both', 'morph', 'func', 'glas'}
BATCH_SIZE = 1                          # int > 0
GT_MODE = 'on'                          # {'on', 'off'}
RUN_LEVEL = 3                           # {1: HTT confidence scores, 2: Grad-CAMs, 3: Segmentation masks}
SAVE_TYPES = [1, 1, 1, 1]               # {HTT confidence scores, Grad-CAMs, Segmentation masks, Summary images}
VERBOSITY = 'QUIET'                    # {'NORMAL', 'QUIET'}
# Settings for image set
IN_PX_RESOL = 0.620
OUT_PX_RESOL = 0.25 * 1088 / 224    # 1.21428571429
DOWNSAMPLE_FACTOR = OUT_PX_RESOL / IN_PX_RESOL

In [22]:
hsn = hsn_v1.HistoSegNetV1(params={'input_name': INPUT_NAME, 'input_size': INPUT_SIZE, 'input_mode': INPUT_MODE,
                                       'down_fac': DOWNSAMPLE_FACTOR, 'batch_size': BATCH_SIZE, 'htt_mode': HTT_MODE,
                                       'gt_mode': GT_MODE, 'run_level': RUN_LEVEL, 'save_types': SAVE_TYPES,
                                       'verbosity': VERBOSITY})

In [23]:
hsn.load_histonet(params={'model_name': MODEL_NAME}, pretrained=IS_FINETUNE)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [24]:
histonet = hsn.hn
histonet.model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 224, 224, 64)      1792      
_________________________________________________________________
activation_1 (Activation)    (None, 224, 224, 64)      0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 224, 224, 64)      256       
_________________________________________________________________
dropout_1 (Dropout)          (None, 224, 224, 64)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 224, 224, 64)      36928     
_________________________________________________________________
activation_2 (Activation)    (None, 224, 224, 64)      0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 224, 224, 64)      256       
__________

### Load images

In [25]:
def preprocess(x, y):
    
    # Random crop and resize     
    crop_size = [416, 416, 3]
    resize_size = [224, 224, 3]
    x = tf.image.random_crop(x, crop_size)
    x = tf.image.resize(x, resize_size)
    
    # Color shifts
    x = tf.image.random_hue(x, 0.5)
    x = tf.image.random_saturation(x, 0.5, 1.5)
    x = tf.image.random_brightness(x, 0.5)
    x = tf.image.random_contrast(x, 0.5, 1.5)
    
    # Random rotation
    x = tf.image.rot90(x, tf.random_uniform(shape=[], minval=0, maxval=4, dtype=tf.int32))
    
    # Normalize
    x = histonet.normalize_image(x, is_glas=True)
    
    return x, y

In [26]:
def load_images(folds):
    X = []
    Y = []
    
    GO_INDEX = 48
    NUM_CLASSES = 51
    
    for fold in folds:
        imgs = np.zeros((len(fold), 522, 775, 3))
        for i, f in enumerate(fold):
            img = np.asarray(Image.open(f), dtype="int32")
            imgs[i] = np.resize(img, (522,775,3))
        
        X.append(imgs)
        
        # Create labels, only class is G.O
        y = np.zeros((len(imgs), NUM_CLASSES))
        y[:,GO_INDEX] = 1
        Y.append(y)
    
    return np.array(X), np.array(Y)

In [None]:
X_train_folds, Y_train_folds = load_images(train_patches)
X_test_folds, Y_test_folds = load_images(test_patches)
X_val_folds, Y_val_folds = load_images(val_patches)

In [None]:
def load_datasets(X_folds, Y_folds):
    
    dataset_folds = []
    
    for X, Y in zip(X_folds, Y_folds):
        print(X.shape, Y.shape)
        dataset = tf.data.Dataset.from_tensor_slices((X, Y))
        dataset = dataset.map(preprocess)

In [None]:
load_datasets(X_train_folds, Y_train_folds)

### Train model

In [None]:
model = histonet.model

In [None]:
if IS_FINETUNE:
    weights_path = 'data/histonet_glas_ft.h5'
else:
    weights_path = 'data/histonet_glas.h5'
model_chkpt = keras.callbacks.ModelCheckpoint(filepath=weights_path, monitor='val_loss', verbose=1,
                                             save_best_only=True, save_weights_only=True)
weights_path

In [None]:
num_epochs = 30
batch_size = 8
num_folds = X_train_folds.shape[0]

In [None]:
for i in range(num_folds):
    X_train, y_train = X_train_folds[i], Y_train_folds[i]
    X_test, y_test = X_test_folds[i], Y_test_folds[i]
    X_val, y_val = X_val_folds[i], Y_val_folds[i]

    train_gen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True)
    
    train_generator = train_gen.flow(X_train, y_train, batch_size=batch_size)
    
    print(X_train.shape)
    print(X_val.shape)
    print(y_val.shape)
    
    model.fit_generator(train_generator, 
                        epochs=num_epochs, 
                        verbose=1, 
                        shuffle=True, 
                        callbacks=[model_chkpt],
                        validation_data=(X_val, y_val),
                        steps_per_epoch=X_train_folds[0].shape[0]/batch_size)
    
#     model.fit(X_train, y_train, epochs=num_epochs, validation_data=(X_val, y_val), 
#               batch_size=4, callbacks=[model_chkpt])