# Preprocessing

* Read the picture files
* Decode JPEG content to RGB pixels
* Convert this into floating tensors
* Rescale pixel values (between 0 to 255) to [0,1] interval.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import pandas as pd
#import tensorflow_datasets as tfds

from tensorflow.keras import datasets, layers, models
from tensorflow.keras.models import Model

In [None]:
#Split the images into Train, Test folders
import shutil, os

#read labels from the csv
df = pd.read_csv('../data/train.csv', sep=";", names=['image_id','image_location','turtle_id'])

#Extract the labels and store in a new data frame called labels
labels = df.sort_values('turtle_id')

#Create a Python list of Unique labels in data frame labels
class_names = list(labels.Class.unique())



In [None]:
#Setting the directories
train_dir="../images_test/train"
val_dir = "../images_test/validation"
test_dir="../images_test/test"
train=pd.read_csv('../data/train.csv')

df_test=pd.read_csv('../data/test.csv')

# Simple Model

In [None]:
def linear_model():
    model = tf.keras.models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    return model
model = linear_model()

In [None]:
model.summary()

# Keras Preprocessing + Augmentation using ImageDataGenerator



- Rescale is a value by which we will multiply the data before any other processing. Our original images consist in RGB coefficients in the 0-255, but such values would be too high for our models to process (given a typical learning rate), so we target values between 0 and 1 instead by scaling with a 1/255. factor.


In [None]:
# Preprocessing using ImageDataGenerator - this is just a showcase what we can do with imagedatagenerator

tf.keras.preprocessing.image.ImageDataGenerator(
    featurewise_center=False, samplewise_center=False,
    featurewise_std_normalization=False, samplewise_std_normalization=False,
    zca_whitening=False, zca_epsilon=1e-06, rotation_range=0, width_shift_range=0.0,
    height_shift_range=0.0, brightness_range=None, shear_range=0.0, zoom_range=0.0,
    channel_shift_range=0.0, fill_mode='nearest', cval=0.0,
    horizontal_flip=False, vertical_flip=False, rescale=None,
    preprocessing_function=None, data_format=None, validation_split=0.0, dtype=None
)

In [None]:
# we could maybe use this as follows: 

from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

# flow_from_directory : Takes the path to a directory & generates batches of augmented data.
# use "rescale" to scale array of original image pixel values to be between [0,1] and specify the parameter rescale=1./255.

def preprocess(augment_randomly=False):
    if augment_randomly==False:
        train_datagen = ImageDataGenerator(
                rotation_range=40,
                width_shift_range=0.2,
                height_shift_range=0.2,
                rescale=1./255,
                shear_range=0.2,
                zoom_range=0.2,
                horizontal_flip=True,
                fill_mode='nearest')

        test_datagen = ImageDataGenerator(rescale=1./255)
    
    else:
        train_datagen = ImageDataGenerator(rescale=1./255)
        test_datagen = ImageDataGenerator(rescale=1./255)
        
    return train_datagen, test_datagen

# this is a generator that will read pictures found in
# subfolers of 'data/train', and indefinitely generate
# batches of augmented image data

def generate_augmented_image(train_datagen, test_datagen, augment_randomly=False): 
    if augment_randomly == False:
        train_generator = train_datagen.flow_from_directory(
                '../images_test/train',
                target_size=(150, 150),
                batch_size=2,
                class_mode='input')

        validation_generator = test_datagen.flow_from_directory(
                '../images_test/validation',
                target_size=(150, 150),
                batch_size=2,
                class_mode='input')
        
        test_generator = test_datagen.flow_from_directory(
                '../images_test/test',
                target_size=(150, 150),
                batch_size=2,
                class_mode='input')
    
            

    return train_generator, validation_generator,test_generator
        

# Simple Model (Training)

In [None]:
def train_and_evaluate(model,batch_size=2):

    model.compile(
        optimizer="adam", 
        # The model outputs one-hot-encoded logits, so we need
        # use the sparse version of the crossentropy loss.
        loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
        metrics=['accuracy']
    )
    
    
    #log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    #tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
    train_datagen, test_datagen = preprocess()
    train_generator, validation_generator,test_generator = generate_augmented_image(train_datagen, test_datagen, augment_randomly=False)
    
    model.fit(
        train_generator, 
        validation_data=validation_generator,
        steps_per_epoch=100 // batch_size, 
        epochs=10)
        #callbacks=[tensorboard_callback])
    
    model.save_weights('simple_model.h5')  # always save your weights after training or during training
    
     
    
     
    return model

In [None]:
# Build and train our model using the prior defined functions 
model = linear_model()

trained_model = train_and_evaluate(model)

Let us use Tensorboard to monitor our results:

In [None]:
#%tensorboard --logdir logs/fit

In [None]:
# Write a testing function.
def test(model):
    
    model.evaluate(test_generator)

In [None]:
# Call the testing function for our model
test(model)