In [12]:
from os import listdir, walk
from os.path import isfile, join
import numpy as np
from PIL import Image
import cv2
from IPython.display import display
import math, random
import time, datetime, sys, os, shutil
import operator

import tensorflow as tf
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.layers import Input
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import *

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split
import pandas as pd

#tf.config.optimizer.set_jit(True)
#tf.debugging.set_log_device_placement(True)
print(tf.config.experimental.list_physical_devices())

AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 32

%run ./variables.ipynb
%run ./utils.ipynb
%run ../utils/data_utils.ipynb

id_map = get_selected_taxons(SELECTED_TAXONS)
id_map_inv = get_selected_taxons(SELECTED_TAXONS, inv=True)
n_classes = len(list(id_map.keys()))

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'), PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# Handle data

## Preparing panda arrays

In [2]:
X, y, _ = get_dataset(ids=False)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Original length:", len(X))
print("Test length:", len(X_train), "- n classes:", len(np.unique(y_train)))
print("Test length:", len(X_test), "- n classes:", len(np.unique(y_test)))
# Balance dataset
train_dict = {}
for file, label in zip(X_train, y_train):
    train_dict.setdefault(label, []).append(file)
max_samples = np.max([len(train_dict[taxon_id]) for taxon_id in train_dict])
X_train = []
y_train = []
for taxon_id in train_dict:
    ratio = np.ceil(max_samples/len(train_dict[taxon_id]))
    tmp = np.repeat(train_dict[taxon_id], ratio)
    np.random.shuffle(tmp)
    train_dict[taxon_id] = tmp[0:max_samples]
    X_train.extend(tmp[0:max_samples])
    y_train.extend([taxon_id]*max_samples)

'171/171'

Original length: 6144
Test length: 4915 - n classes: 171
Test length: 1229 - n classes: 167


In [3]:
data_train = {'png_path':  X_train, 'taxon_id': y_train}
data_test = {'png_path':  X_test, 'taxon_id': y_test}

df_train = pd.DataFrame(data_train, columns = ['png_path', 'taxon_id'])
df_test = pd.DataFrame(data_test, columns = ['png_path', 'taxon_id'])
df_train = df_train.sample(frac=1).reset_index(drop=True)
df_test = df_test.sample(frac=1).reset_index(drop=True)

# Prtining some infos
print(len(df_train), len(df_test))
df_train.head()
df_test.head()

20691 1229


Unnamed: 0,png_path,taxon_id
0,/mnt/nvme-storage/pfauregi/datasets/condensed/...,AUPU
1,/mnt/nvme-storage/pfauregi/datasets/condensed/...,FMOC
2,/mnt/nvme-storage/pfauregi/datasets/condensed/...,DMES
3,/mnt/nvme-storage/pfauregi/datasets/condensed/...,DSTE
4,/mnt/nvme-storage/pfauregi/datasets/condensed/...,CAGR


## Setting up the flows

In [4]:
datagen = ImageDataGenerator(rescale=1./255.,
                         rotation_range=90, 
                         brightness_range=[0.8,1.2], 
                         horizontal_flip=True, 
                         vertical_flip=True,
                         fill_mode='nearest',
                         width_shift_range=40,
                         height_shift_range=40,
                         zoom_range=0.2,
                         validation_split=0.2,
                         #featurewise_std_normalization=True,
                         data_format="channels_last")

classes_array = get_id_array(id_map)
train_generator = datagen.flow_from_dataframe(
        dataframe=df_train,
        x_col='png_path',
        y_col='taxon_id',
        target_size=(256, 256),
        batch_size=32,
        classes=classes_array,
        class_mode='categorical')
val_generator = datagen.flow_from_dataframe(
        dataframe=df_test,
        x_col='png_path',
        y_col='taxon_id',
        target_size=(256, 256),
        batch_size=32,
        classes=classes_array,
        class_mode='categorical')

train_spe = train_generator.samples // BATCH_SIZE
val_spe = val_generator.samples // BATCH_SIZE

Found 20691 validated image filenames belonging to 230 classes.
Found 1229 validated image filenames belonging to 230 classes.


## Testing

In [None]:
i = 0
stop = False
for batch in train_generator:
    images = batch[0]
    labels = batch[1]
    for i in range(images.shape[0]):
        print(np.argmax(labels[i]))
        image = (images[i,:,:,:]*255).astype('uint8')
        #print(image)
        display(Image.fromarray(image))
        i+=1
        if i>=1:
            stop = True
            break
    if stop: break;
    #display()

# Model desgin

In [5]:
# fetching base model
#base_model = Xception(include_top=False, weights='imagenet', input_shape=(256, 256, 3), pooling=None)
input_tensor = Input(shape=(256, 256, 3))
base_model = InceptionV3(weights='imagenet', input_tensor=input_tensor, include_top=False)

In [6]:
# setting model for specifiv case
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
out = Dense(230, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=out)

# Training

In [7]:
delete_all_files_in_folder(LOG_DIR)
delete_all_files_in_folder(SAVED_MODELS_ROOT)

## Setting callbacks

In [8]:
# Setting tensorboard
log_dir = LOG_DIR + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [9]:
filename="model-{epoch:02d}.hdf5"
checkpoint_callback = ModelCheckpoint(os.path.join(SAVED_MODELS_ROOT, filename), 
                             monitor='loss', 
                             verbose=1, 
                             save_best_only=True, 
                             mode='auto', 
                             save_freq='epoch')

## New layers only

In [10]:
n_epochs = 3
last_epoch = get_last_epoch(SAVED_MODELS_ROOT)

print(n_epochs, "epochs composed of", train_spe, "batches (steps) of", BATCH_SIZE, "images.")

for layer in base_model.layers:
    layer.trainable = False
    
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_generator, 
          epochs=last_epoch+n_epochs, 
          steps_per_epoch=train_spe,
          use_multiprocessing=True, 
          validation_data=val_generator,
          validation_steps=val_spe,
          callbacks=[tensorboard_callback, checkpoint_callback],
          initial_epoch=last_epoch)

3 epochs composed of 646 batches (steps) of 32 images.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 646 steps, validate for 38 steps
Epoch 1/3

Epoch 00001: loss improved from inf to 2.63742, saving model to ./saved_models/model-01.hdf5
Epoch 2/3

Epoch 00002: loss improved from 2.63742 to 1.27900, saving model to ./saved_models/model-02.hdf5
Epoch 3/3

Epoch 00003: loss improved from 1.27900 to 1.05026, saving model to ./saved_models/model-03.hdf5


<tensorflow.python.keras.callbacks.History at 0x7f44841dbac8>

Process Keras_worker_ForkPoolWorker-2:
Process Keras_worker_ForkPoolWorker-4:
Process Keras_worker_ForkPoolWorker-6:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/usr/lib/python3.5/multiprocessing/pool.p

## Training 2 last inceptions blocks

In [13]:
n_epochs = 10
last_epoch = get_last_epoch(SAVED_MODELS_ROOT)

print(n_epochs, "epochs composed of", train_spe, "batches (steps) of", BATCH_SIZE, "images.")

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers[:249]:
    layer.trainable = False
for layer in model.layers[249:]:
    layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])

# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
model.fit(train_generator, 
          epochs=last_epoch+n_epochs, 
          steps_per_epoch=train_spe,
          use_multiprocessing=True, 
          validation_data=val_generator,
          validation_steps=val_spe,
          callbacks=[tensorboard_callback, checkpoint_callback],
          initial_epoch=last_epoch)

10 epochs composed of 646 batches (steps) of 32 images.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 646 steps, validate for 38 steps
Epoch 4/13

Epoch 00004: loss improved from 1.05026 to 0.86657, saving model to ./saved_models/model-04.hdf5
Epoch 5/13

Epoch 00005: loss improved from 0.86657 to 0.47660, saving model to ./saved_models/model-05.hdf5
Epoch 6/13

Epoch 00006: loss improved from 0.47660 to 0.37727, saving model to ./saved_models/model-06.hdf5
Epoch 7/13

Epoch 00007: loss improved from 0.37727 to 0.32227, saving model to ./saved_models/model-07.hdf5
Epoch 8/13

Epoch 00008: loss improved from 0.32227 to 0.28202, saving model to ./saved_models/model-08.hdf5
Epoch 9/13

Epoch 00009: loss improved from 0.28202 to 0.25470, saving model to ./saved_models/model-09.hdf5
Epoch 10/13

Epoch 00010: loss improved from 0.25470 to 0.22262, saving model to ./saved_models/model-10.hdf5
Epoch 11/13

Epoch 00011: loss did not improve from 0.22262
Epoch 12/13

Epoch 00012: l

<tensorflow.python.keras.callbacks.History at 0x7f45654b5898>

Process Keras_worker_ForkPoolWorker-20:
Process Keras_worker_ForkPoolWorker-26:
Process Keras_worker_ForkPoolWorker-16:
Process Keras_worker_ForkPoolWorker-24:
Process Keras_worker_ForkPoolWorker-22:
Process Keras_worker_ForkPoolWorker-18:
Process Keras_worker_ForkPoolWorker-14:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/usr

## Training full model

In [None]:
n_epochs = 20
last_epoch = get_last_epoch(SAVED_MODELS_ROOT)

print(n_epochs, "epochs composed of", train_spe, "batches (steps) of", BATCH_SIZE, "images.")

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers:
    layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
model.fit(train_generator, 
          epochs=last_epoch+n_epochs, 
          steps_per_epoch=train_spe,
          use_multiprocessing=True, 
          validation_data=val_generator,
          validation_steps=val_spe,
          callbacks=[tensorboard_callback, checkpoint_callback],
          initial_epoch=last_epoch)

30 epochs composed of 646 batches (steps) of 32 images.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 646 steps, validate for 38 steps
Epoch 14/43

Epoch 00014: loss did not improve from 0.18806
Epoch 15/43

Epoch 00015: loss did not improve from 0.18806
Epoch 16/43

Epoch 00016: loss did not improve from 0.18806
Epoch 17/43

Epoch 00017: loss did not improve from 0.18806
Epoch 18/43

Epoch 00018: loss did not improve from 0.18806
Epoch 19/43

Epoch 00019: loss did not improve from 0.18806
Epoch 20/43

Epoch 00020: loss did not improve from 0.18806
Epoch 21/43

Epoch 00021: loss did not improve from 0.18806
Epoch 22/43


Exception in thread Thread-158:
Traceback (most recent call last):
  File "/usr/lib/python3.5/threading.py", line 914, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.5/threading.py", line 862, in run
    self._target(*self._args, **self._kwargs)
  File "/mnt/nvme-storage/venv/tf2.1/lib/python3.5/site-packages/tensorflow_core/python/keras/utils/data_utils.py", line 844, in _run
    with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor:
  File "/mnt/nvme-storage/venv/tf2.1/lib/python3.5/site-packages/tensorflow_core/python/keras/utils/data_utils.py", line 823, in pool_fn
    initargs=(seqs, None, get_worker_id_queue()))
  File "/usr/lib/python3.5/multiprocessing/context.py", line 118, in Pool
    context=self.get_context())
  File "/usr/lib/python3.5/multiprocessing/pool.py", line 168, in __init__
    self._repopulate_pool()
  File "/usr/lib/python3.5/multiprocessing/pool.py", line 233, in _repopulate_pool
    w.start()
  File "/usr/lib/python3.5/multiprocessing/proc

## Saving model

In [None]:
save_model(model, SAVED_MODELS_ROOT)