In [1]:
from os import listdir, walk
from os.path import isfile, join
import numpy as np
from PIL import Image
import cv2
from IPython.display import display
import math, random
import time, datetime, sys, os, shutil, logging
import operator

import tensorflow as tf
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.layers import Input
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from scipy.ndimage.interpolation import shift
import pandas as pd

print(tf.config.experimental.list_physical_devices())

%run ./variables.ipynb
%run ./utils.ipynb
%run ../utils/data_utils.ipynb

check_dirs(SAVED_MODELS_ROOT)

logging.basicConfig(
    level=logging.INFO, 
    format='%(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(filename="info.log"),
        logging.StreamHandler(sys.stdout)
    ]
)
logger = logging.getLogger('MAINLOGGER')
logger.info("First log")

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'), PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
INFO - First log


# Parameters

In [2]:
n_folds=10
val_split=0.1
pretrain_epochs = 3
train_epochs = 20
verbose = 0
BATCH_SIZE = 32 

# Preparing Data Generators

In [3]:
def preproc(img):
    # Zoom img
    zoomed_img=cv2_clipped_zoom(img, np.random.uniform(80,120)/100)
    # Shift img
    ox, oy = np.random.randint(-30,30,2)/100
    return tf.keras.preprocessing.image.random_shift(zoomed_img, ox, oy, row_axis=0, col_axis=1, channel_axis=2, fill_mode='nearest')

train_datagen = ImageDataGenerator(rescale=1./255.,
                         rotation_range=180, 
                         brightness_range=[0.8,1.2], 
                         horizontal_flip=True, 
                         vertical_flip=True,
                         fill_mode='nearest',
                         preprocessing_function=preproc,
                         #width_shift_range=10,
                         #height_shift_range=10,
                         #zoom_range=0,
                         data_format="channels_last")

test_val = ImageDataGenerator(rescale=1./255.,
                         data_format="channels_last")

test_datagen = ImageDataGenerator(rescale=1./255.,
                         data_format="channels_last")

# Cross Validation

In [None]:
X, y, _ = get_dataset(DATASET_PATH, ids=False)
logger.info("Initial dataset length: "+str(len(X))+"- n classes: "+ str(len(np.unique(y))))
start = time.time()
skf = StratifiedKFold(n_splits=n_folds)
fold = 1
folds_val_loss = []
folds_val_acc = []
for train_index, test_index in skf.split(X, y):
    logger.info("Fold: "+str(fold)+"/"+str(n_folds))
    fold+=1
    
    # PREPARING DATA
    X_trainval, X_test = X[train_index], X[test_index]
    y_trainval, y_test = y[train_index], y[test_index]
    X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=val_split, random_state=42, stratify=y_trainval)
    X_train, y_train, max_samples = balance_dataset(X_train, y_train, max_samples=None)
    df_train = pd.DataFrame(np.transpose([X_train, y_train]), columns = ['X', 'y'])
    df_val = pd.DataFrame(np.transpose([X_val, y_val]), columns = ['X', 'y'])
    df_test = pd.DataFrame(np.transpose([X_test, y_test]), columns = ['X', 'y'])
    
    # PREPARING GENERATORS
    train_generator = train_datagen.flow_from_dataframe(
            dataframe=df_train,
            x_col='X',
            y_col='y',
            target_size=(256, 256),
            batch_size=BATCH_SIZE,
            #classes=classes_array,
            class_mode='categorical')
    val_generator = test_datagen.flow_from_dataframe(
            dataframe=df_val,
            x_col='X',
            y_col='y',
            target_size=(256, 256),
            batch_size=1,
            classes=train_generator.class_indices,
            class_mode='categorical')
    test_generator = test_datagen.flow_from_dataframe(
            dataframe=df_test,
            x_col='X',
            y_col='y',
            target_size=(256, 256),
            batch_size=1,
            classes=train_generator.class_indices,
            class_mode='categorical')
    train_spe = train_generator.samples // BATCH_SIZE
    val_spe = val_generator.samples
    test_spe = test_generator.samples
    
    # PREPARING MODEL
    input_tensor = Input(shape=(256, 256, 3))
    base_model = Xception(include_top=False, weights='imagenet', input_tensor=input_tensor, pooling=None)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    out = Dense(len(train_generator.class_indices.keys()), activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=out)
    checkpointer = ModelCheckpoint(filepath="weights.hdf5", verbose=verbose, save_best_only=True)
    #optimizer = SGD(lr=0.1, decay=0.0001, momentum=0, nesterov=False)
    optimizer = "adam"
    
    # TRAINING
    logger.info("Training new layers")
    for layer in base_model.layers: layer.trainable = False
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    history = model.fit(train_generator, 
              epochs=pretrain_epochs, 
              steps_per_epoch=train_spe,
              use_multiprocessing=False, 
              validation_data=val_generator,
              validation_steps=val_spe,
              callbacks=[],
              verbose=verbose,
              initial_epoch=0)
    logger.info("Training full model")
    for layer in model.layers: layer.trainable = True
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    history = model.fit(train_generator, 
              epochs=train_epochs, 
              steps_per_epoch=train_spe,
              use_multiprocessing=False, 
              validation_data=val_generator,
              validation_steps=val_spe,
              callbacks=[checkpointer],
              verbose=verbose,
              initial_epoch=5)
    model.load_weights('weights.hdf5')
    os.remove('weights.hdf5')
    
    # Getting result score
    logger.info("Evaluating!")
    logger.info("[val_loss, val_accuracy]: "+str(model.evaluate(val_generator, steps=val_spe)))
    fold_scores = model.evaluate(test_generator, steps=test_spe)
    logger.info("[test_loss, test_accuracy]: "+str(fold_scores))
    folds_val_loss.append(fold_scores[0])
    folds_val_acc.append(fold_scores[1])
cv_score = np.mean(folds_val_acc)
cv_std = np.std(folds_val_acc)
time_elapsed = time.time()-start
logger.info("Mean accuracy: "+str(cv_score)+"+-"+str(cv_std))
logger.info("Computed in "+str(time_elapsed)+"s")

Retrieving dataset from: /mnt/nvme-storage/pfauregi/training/thumbails/atlas/dataset/


'166/166'

INFO - Initial dataset length: 9895- n classes: 166
INFO - Fold: 1/10
Balanced to 149 samples per class!
Found 24734 validated image filenames belonging to 166 classes.
Found 891 validated image filenames belonging to 166 classes.
Found 990 validated image filenames belonging to 166 classes.
INFO - Training new layers
INFO - Training full model
