In [1]:
from os import listdir, walk
from os.path import isfile, join
import numpy as np
from PIL import Image
import time
import cv2
from sys import getsizeof
from IPython.display import display
import random
import math
import datetime
import sys

import tensorflow as tf
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.layers import Input
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import *
#tf.config.optimizer.set_jit(True)
#tf.debugging.set_log_device_placement(True)
print(tf.config.experimental.list_physical_devices())

AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 32
N_EPOCHS = 20
TRAIN_P = 0.70
TEST_P = 0.15
VAL_P = 0.15

%run ./variables.ipynb
%run ./utils.ipynb

id_map = get_selected_taxons()
n_classes = len(list(id_map.keys()))

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'), PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# Handle data

In [2]:
files, labels = get_dataset()
NB_SAMPLES = len(files)

'185/185'

In [3]:
def cb_load_image(image_path, label):
    img_file = tf.io.read_file(image_path)
    img = tf.image.decode_png(img_file, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32) #0-1 range
    return img, label

full_dataset = tf.data.Dataset.from_tensor_slices((files, tf.keras.utils.to_categorical(labels, num_classes=None, dtype='float32')))
full_dataset = full_dataset.shuffle(len(files))
full_dataset = full_dataset.map(cb_load_image, num_parallel_calls=AUTOTUNE)
#full_dataset = full_dataset.batch(BATCH_SIZE)
#full_dataset = full_dataset.prefetch(1)

In [4]:
#for image, label in full_dataset.take(1):
#    print(image.numpy().shape)
#    print(label.numpy())
#    display(Image.fromarray(np.uint8(image.numpy()*255)))

In [5]:
DATASET_SIZE = len(files)
train_size = int(TRAIN_P * DATASET_SIZE)
val_size = int(VAL_P * DATASET_SIZE)
test_size = int(TEST_P * DATASET_SIZE)

train_dataset = full_dataset.take(train_size)
tmp_dataset = full_dataset.skip(train_size)
val_dataset = tmp_dataset.skip(val_size)
test_dataset = tmp_dataset.take(test_size)

# Model desgin

In [6]:
# fetching base model
#base_model = Xception(include_top=False, weights='imagenet', input_shape=(256, 256, 3), pooling=None)
input_tensor = Input(shape=(256, 256, 3))
base_model = InceptionV3(weights='imagenet', input_tensor=input_tensor, include_top=False)

In [7]:
# setting model for specifiv case
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
out = Dense(230, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=out)

# Training

In [8]:
# Setting tensorboard
!rm -rf LOG_DIR
log_dir = LOG_DIR + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

## New layers only

In [9]:
print(int(0.1*N_EPOCHS), "epochs composed of", (int(train_size/BATCH_SIZE)-1), "batches (steps) of", BATCH_SIZE, "images.")

2 epochs composed of 807 batches (steps) of 32 images.


In [10]:
n_epochs_train1 = int(0.1*N_EPOCHS)

train_train1 = train_dataset.repeat(n_epochs_train1).batch(BATCH_SIZE)
val_train1 = val_dataset.repeat(n_epochs_train1).batch(BATCH_SIZE)

for layer in base_model.layers:
    layer.trainable = False
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
#model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
model.fit(train_train1, 
          epochs=n_epochs_train1, 
          steps_per_epoch=int(train_size/BATCH_SIZE)-1,
          use_multiprocessing=True, 
          validation_data=val_train1,
          validation_steps=int(val_size/BATCH_SIZE)-1,
          callbacks=[tensorboard_callback])

Train for 807 steps, validate for 172 steps
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7fa72e23c518>

## Training network

In [None]:
for i, layer in enumerate(base_model.layers):
    print(i, layer.name)

In [13]:
n_epochs_train2 = int(0.5*N_EPOCHS)

train_train2 = train_dataset.repeat(n_epochs_train2).batch(BATCH_SIZE)
val_train2 = val_dataset.repeat(n_epochs_train2).batch(BATCH_SIZE)

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers[:249]:
    layer.trainable = True
for layer in model.layers[249:]:
    layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
model.fit(train_train2, 
          epochs=n_epochs_train2, 
          steps_per_epoch=int(train_size/BATCH_SIZE)-1,
          use_multiprocessing=True, 
          validation_data=val_train2,
          validation_steps=int(val_size/BATCH_SIZE)-1,
          callbacks=[tensorboard_callback])

Train for 807 steps, validate for 172 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fa72cab0358>

In [15]:
model_json = model.save_weights("./saved_models/model.h5")
model_json = model.to_json()
with open("./saved_models/model.json", "w") as json_file:
    json_file.write(model_json)

# Analytics

In [None]:
preds = model.predict(test_dataset, use_multiprocessing=True)

In [None]:
labels = [np.argmax(pred) for pred in preds]

In [None]:
int(0.1*N_EPOCHS)

In [None]:
labels