In [1]:
from os import listdir, walk
from os.path import isfile, join
import numpy as np
from PIL import Image
import cv2
from IPython.display import display
import math, random
import time, datetime, sys, os, shutil
import operator

import tensorflow as tf
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.layers import Input
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.callbacks import CSVLogger
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import ModelCheckpoint

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split
import pandas as pd

print(tf.config.experimental.list_physical_devices())

BATCH_SIZE = 32

%run ./variables.ipynb
%run ./utils.ipynb
%run ../utils/data_utils.ipynb

id_map = get_selected_taxons(SELECTED_TAXONS)
id_map_inv = get_selected_taxons(SELECTED_TAXONS, inv=True)
n_classes = len(list(id_map.keys()))
check_dirs(SAVED_MODELS_ROOT)

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'), PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# Handle data

## Preparing panda arrays

In [2]:
X_train, y_train, _ = get_dataset(TRAIN_DATASET_PATH, ids=False)
X_test, y_test, _ = get_dataset(TEST_DATASET_PATH, ids=False)

print("Test length:", len(X_train), "- n classes:", len(np.unique(y_train)))
print("Test length:", len(X_test), "- n classes:", len(np.unique(y_test)))

# Balance dataset
#X_train, y_train, max_samples = balance_dataset(X_train, y_train, max_samples=None)

Retrieving dataset from: /mnt/nvme-storage/pfauregi/training/thumbails/ADIAC_D38/dataset/train/


'38/38'

Retrieving dataset from: /mnt/nvme-storage/pfauregi/training/thumbails/ADIAC_D38/dataset/test/


'38/38'

Test length: 753 - n classes: 38
Test length: 84 - n classes: 38


In [3]:
data_train = {'png_path':  X_train, 'taxon_id': y_train}
data_test = {'png_path':  X_test, 'taxon_id': y_test}

df_train = pd.DataFrame(data_train, columns = ['png_path', 'taxon_id'])
df_test = pd.DataFrame(data_test, columns = ['png_path', 'taxon_id'])
df_train = df_train.sample(frac=1).reset_index(drop=True)
df_test = df_test.sample(frac=1).reset_index(drop=True)

# Prtining some infos
print(len(df_train), len(df_test))
df_train.head()
df_test.head()

753 84


Unnamed: 0,png_path,taxon_id
0,/mnt/nvme-storage/pfauregi/training/thumbails/...,Navicula_sp
1,/mnt/nvme-storage/pfauregi/training/thumbails/...,Encyonema_silesiacum
2,/mnt/nvme-storage/pfauregi/training/thumbails/...,Petroneis_humerosa
3,/mnt/nvme-storage/pfauregi/training/thumbails/...,Denticula_tenuis
4,/mnt/nvme-storage/pfauregi/training/thumbails/...,Cymbella_helvetica


# Model desgin

In [9]:
pretrain_epochs = 5
train_epochs = 100

# data generator
train_datagen = ImageDataGenerator(rescale=1./255.,
                         rotation_range=20, 
                         brightness_range=[0.8,1.2], 
                         horizontal_flip=True, 
                         vertical_flip=True,
                         fill_mode='nearest',
                         width_shift_range=10,
                         height_shift_range=10,
                         zoom_range=0.2,
                         data_format="channels_last")

test_datagen = ImageDataGenerator(rescale=1./255.,
                         data_format="channels_last")

# getting flows
train_generator = train_datagen.flow_from_dataframe(
        dataframe=df_train,
        x_col='png_path',
        y_col='taxon_id',
        target_size=(256, 256),
        batch_size=32,
        #classes=classes_array,
        class_mode='categorical')
val_generator = test_datagen.flow_from_dataframe(
        dataframe=df_test,
        x_col='png_path',
        y_col='taxon_id',
        target_size=(256, 256),
        batch_size=32,
        classes=train_generator.class_indices,
        class_mode='categorical')
train_spe = train_generator.samples // BATCH_SIZE
val_spe = val_generator.samples // BATCH_SIZE

#model
input_tensor = Input(shape=(256, 256, 3))
base_model = Xception(include_top=False, weights='imagenet', input_tensor=input_tensor, pooling=None)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
out = Dense(len(train_generator.class_indices.keys()), activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=out)

#callbacks
checkpointer = ModelCheckpoint(filepath="weights.hdf5", verbose=1, save_best_only=True)

#optimizer
#optimizer = SGD(lr=0.1, decay=0.0001, momentum=0, nesterov=False)
optimizer = "adam"

print(pretrain_epochs, "epochs composed of", train_spe, "batches (steps) of", BATCH_SIZE, "images.")
for layer in base_model.layers:
    layer.trainable = False
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_generator, 
          epochs=pretrain_epochs, 
          steps_per_epoch=train_spe,
          use_multiprocessing=False, 
          validation_data=val_generator,
          validation_steps=val_spe,
          callbacks=[],
          verbose=0,
          initial_epoch=0)

print(train_epochs, "epochs composed of", train_spe, "batches (steps) of", BATCH_SIZE, "images.")
for layer in model.layers:
    layer.trainable = True
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_generator, 
          epochs=train_epochs, 
          steps_per_epoch=train_spe,
          use_multiprocessing=False, 
          validation_data=val_generator,
          validation_steps=val_spe,
          callbacks=[checkpointer],
          verbose=1,
          initial_epoch=5)
model.load_weights('weights.hdf5')
model.evaluate(val_generator)

Found 753 validated image filenames belonging to 38 classes.
Found 84 validated image filenames belonging to 38 classes.
5 epochs composed of 23 batches (steps) of 32 images.
  ...
    to  
  ['...']
  ...
    to  
  ['...']


KeyboardInterrupt: 