In [68]:
from os import listdir, walk
from os.path import isfile, join
import numpy as np
from PIL import Image
import time
import cv2
from sys import getsizeof
from IPython.display import display
import random
import math
import datetime
import sys
from tensorflow.keras.optimizers import *

import tensorflow as tf
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.layers import Input
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

from tensorflow.keras.preprocessing.image import ImageDataGenerator

#tf.config.optimizer.set_jit(True)
#tf.debugging.set_log_device_placement(True)
print(tf.config.experimental.list_physical_devices())

AUTOTUNE = tf.data.experimental.AUTOTUNE
STEPS_PER_EPOCH = 600
VALIDATION_STEPS = 30


%run ./variables.ipynb
%run ./utils.ipynb
%run ../utils/data_utils.ipynb

id_map = get_selected_taxons("../../selected_taxons.txt")
id_map_inv = get_selected_taxons("../../selected_taxons.txt", inv=True)
n_classes = len(list(id_map.keys()))

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'), PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# Handle data

In [None]:
size = max(id_map.items(), key=operator.itemgetter(1))[1]
classes_array = [None]*(size+1)
for el in id_map:
    classes_array[id_map[el]]=el
classes_array[0]="DUST"

In [64]:
datagen = ImageDataGenerator(rescale=1./255.,
                         rotation_range=90, 
                         brightness_range=[0.8,1.2], 
                         horizontal_flip=True, 
                         vertical_flip=True,
                         fill_mode='nearest',
                         width_shift_range=40,
                         height_shift_range=40,
                         zoom_range=0.2,
                         zca_whitening=True,
                         validation_split=0.2) 
train_generator = datagen.flow_from_directory(
    DATASET_PATH,
    class_mode="categorical",
    color_mode="rgb",
    shuffle=True,
    classes=classes_array,
    batch_size=32,
    subset="training",
    seed=27)
val_generator = datagen.flow_from_directory(
    DATASET_PATH,
    class_mode="categorical",
    color_mode="rgb",
    shuffle=True,
    classes=classes_array,
    batch_size=32,
    subset="validation",
    seed=27)

Found 4990 images belonging to 230 classes.
Found 1154 images belonging to 230 classes.


In [None]:
i = 0
for batch in train_generator:
    i+=1
    images = batch[0]
    labels = batch[1]
    for i in range(images.shape[0]):
        print(np.argmax(labels[i]))
        image = (images[i,:,:,:]*255).astype('uint8')
        #print(image)
        display(Image.fromarray(image))
    #display()
    if i>=1:
        break

# Model desgin

In [None]:
# fetching base model
#base_model = Xception(include_top=False, weights='imagenet', input_shape=(256, 256, 3), pooling=None)
input_tensor = Input(shape=(256, 256, 3))
base_model = InceptionV3(weights='imagenet', input_tensor=input_tensor, include_top=False)

In [7]:
# setting model for specifiv case
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
out = Dense(230, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=out)

# Training

In [8]:
# Setting tensorboard
!rm -rf LOG_DIR
log_dir = LOG_DIR + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

## New layers only

In [9]:
print(int(0.1*N_EPOCHS), "epochs composed of", (int(train_size/BATCH_SIZE)-1), "batches (steps) of", BATCH_SIZE, "images.")

2 epochs composed of 923 batches (steps) of 32 images.


In [10]:
n_epochs_train1 = 3

for layer in base_model.layers:
    layer.trainable = False
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
model.fit(train_generator, 
          epochs=n_epochs_train1, 
          steps_per_epoch=STEPS_PER_EPOCH,
          use_multiprocessing=True, 
          validation_data=val_generator,
          validation_steps=VALIDATION_STEPS,
          callbacks=[tensorboard_callback],
          initial_epoch=0)

Train for 924 steps, validate for 231 steps
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fe0654615f8>

In [None]:
train_train1 = train_dataset.repeat(n_epochs_train1).batch(BATCH_SIZE)
val_train1 = val_dataset.repeat(n_epochs_train1).batch(BATCH_SIZE)

print(n_epochs_train1*int(train_size/BATCH_SIZE))
print(tf.data.experimental.cardinality(train_train1).numpy())
print(n_epochs_train1*int(val_size/BATCH_SIZE))
print(int(tf.data.experimental.cardinality(val_train1).numpy()))
val_size

## Training 2 last inceptions blocks

In [None]:
for i, layer in enumerate(base_model.layers):
    print(i, layer.name)

In [None]:
n_epochs_train2 = n_epochs_train1+10

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers[:249]:
    layer.trainable = False
for layer in model.layers[249:]:
    layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])

# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
model.fit(train_generator, 
          epochs=n_epochs_train2, 
          steps_per_epoch=STEPS_PER_EPOCH,
          use_multiprocessing=True, 
          validation_data=val_generator,
          validation_steps=VALIDATION_STEPS,
          callbacks=[tensorboard_callback],
          initial_epoch=n_epochs_train1)

## Training full model

In [12]:
n_epochs_train3 = n_epochs_train2+20

train_train3 = train_dataset.repeat(n_epochs_train3).batch(BATCH_SIZE)
val_train3 = val_dataset.repeat(n_epochs_train3).batch(BATCH_SIZE)

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers:
    layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
model.fit(train_generator, 
          epochs=n_epochs_train3, 
          steps_per_epoch=STEPS_PER_EPOCH,
          use_multiprocessing=True, 
          validation_data=val_generator,
          validation_steps=VALIDATION_STEPS,
          callbacks=[tensorboard_callback],
          initial_epoch=n_epochs_train2)

Train for 923 steps, validate for 230 steps
Epoch 14/33
Epoch 15/33
Epoch 16/33
Epoch 17/33
Epoch 18/33
Epoch 19/33
Epoch 20/33
Epoch 21/33
Epoch 22/33
Epoch 23/33
Epoch 24/33
Epoch 25/33
Epoch 26/33
Epoch 27/33
Epoch 28/33
Epoch 29/33
Epoch 30/33
Epoch 31/33
Epoch 32/33
Epoch 33/33


<tensorflow.python.keras.callbacks.History at 0x7fe064f75550>

## Saving model

In [13]:
model_json = model.save_weights("./saved_models/model.h5")
model_json = model.to_json()
with open("./saved_models/model.json", "w") as json_file:
    json_file.write(model_json)
    
    # serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

# Analytics

In [None]:
preds = model.predict(test_dataset, use_multiprocessing=True)

In [None]:
labels = [np.argmax(pred) for pred in preds]

In [None]:
int(0.1*N_EPOCHS)

In [None]:
labels