# Preprocessing

* Read the picture files
* Decode JPEG content to RGB pixels
* Convert this into floating tensors
* Rescale pixel values (between 0 to 255) to [0,1] interval.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import pandas as pd
import datetime
# Load the TensorBoard notebook extension
%load_ext tensorboard 
#import tensorflow_datasets as tfds

from tensorflow.keras import datasets, layers, models
from tensorflow.keras.models import Model

In [None]:
#Split the images into Train, Test folders
import shutil, os

#read labels from the csv
df = pd.read_csv('train.csv')

#Extract the labels and store in a new data frame called labels
labels = df.sort_values('image_id')
labels = df.sort_values ('turtle_id')
#Create a Python list of Unique labels in data frame labels
image_names = list(labels.image_id.unique())
label_names = list(labels.turtle_id.unique())

NCLASSES = len(label_names)

In [None]:
#Setting the directories
train_dir="../images/"

train=pd.read_csv('train.csv')
test =pd.read_csv('train.csv')
train.image_id= train.image_id.apply(lambda x: x.strip()+".JPG")



In [None]:
train.tail()

# Simple Model

In [None]:
def linear_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape=[150, 150, 3], name='image_id'))
    model.add(tf.keras.layers.Flatten(data_format="channels_last"))
    # We want to have a simple linear model so we have 
    # no activation function. 
    model.add(tf.keras.layers.Dense(units=NCLASSES, activation=None))
    return model

In [None]:
model.summary()

# Keras Preprocessing + Augmentation using ImageDataGenerator



- Rescale is a value by which we will multiply the data before any other processing. Our original images consist in RGB coefficients in the 0-255, but such values would be too high for our models to process (given a typical learning rate), so we target values between 0 and 1 instead by scaling with a 1/255. factor.


In [None]:
# we could maybe use this as follows: 

from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

# flow_from_directory : Takes the path to a directory & generates batches of augmented data.
# use "rescale" to scale array of original image pixel values to be between [0,1] and specify the parameter rescale=1./255.

def preprocess(augment_randomly=False):
    if augment_randomly==False:
        train_datagen = ImageDataGenerator(
                rotation_range=40,
                width_shift_range=0.2,
                height_shift_range=0.2,
                rescale=1./255,
                shear_range=0.2,
                zoom_range=0.2,
                horizontal_flip=True,
                fill_mode='nearest')

        test_datagen = ImageDataGenerator(rescale=1./255)
    
    else:
        train_datagen = ImageDataGenerator(rescale=1./255)
        test_datagen = ImageDataGenerator(rescale=1./255)
        
    return train_datagen, test_datagen

# this is a generator that will read pictures found in
# subfolers of 'data/train', and indefinitely generate
# batches of augmented image data
#x_col value : which will be the name of column(in dataframe) having file names
#y_col value : which will be the name of column(in dataframe) having class/label

def generate_augmented_image(train_datagen, test_datagen, augment_randomly=False): 
    
    if augment_randomly == False:
        train_generator = train_datagen.flow_from_dataframe(dataframe =train[0:1700], 
                directory = train_dir,
                x_col="image_id" ,
                y_col="turtle_id",
                target_size=(150, 150),
                batch_size=32,
                class_mode='categorical')
                #save_to_dir="output/",  if you wanna save the cropped images
                #save_prefix="",
                #save_format='png')
                
        for _ in range(5):
            img, label = train_generator.next()
            print(img.shape)   #  (1,256,256,3)
            plt.imshow(img[0])
            plt.show()

        validation_generator = train_datagen.flow_from_dataframe(dataframe =train[1701:2145], 
                directory = train_dir,
                x_col="image_id",
                y_col="turtle_id",
                target_size=(150, 150),
                batch_size=32,
                class_mode='categorical')
    
            

        return train_generator, validation_generator
        

# Simple Model (Training)

In [None]:
def train_and_evaluate(model,batch_size=32):

    model.compile(
        optimizer="adam", 
        # The model outputs one-hot-encoded logits, so we need
        # use the sparse version of the crossentropy loss.
        loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
        metrics=['accuracy']
    )
    
    
    log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
    train_datagen, test_datagen = preprocess()
    train_generator, validation_generator = generate_augmented_image(train_datagen, test_datagen, augment_randomly=False)
    
    model.fit(
        train_generator, 
        validation_data=validation_generator,
        steps_per_epoch=1000 // batch_size, 
        epochs=10,
        callbacks=[tensorboard_callback])
    
    #model.save_weights('simple_model.h5')  # always save your weights after training or during training
    
     
    
     
    return model

In [None]:
# Build and train our model using the prior defined functions 
model = linear_model()

trained_model = train_and_evaluate(model)

Let us use Tensorboard to monitor our results:

In [None]:
%tensorboard --logdir logs/fit

In [None]:
# Write a testing function.
def test(model):
    
    model.evaluate(test_generator)

# Transfer Learning - not ready obviously- :D

In [None]:
import tensorflow_hub as hub
feature_extractor_url = "https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/2"
feature_extractor_layer = hub.KerasLayer(feature_extractor_url,input_shape=(224,224,3))

In [None]:
feature_extractor_layer.trainable = False

In [None]:
def transfer_learning_model():
    model = tf.keras.models.Sequential()
    model.add(feature_extractor_layer)
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(units=300, activation="relu"))
    model.add(tf.keras.layers.Dense(units=NCLASSES, activation=None))

    return model

In [None]:
model = transfer_learning_model()
trained_model = train_and_evaluate(model)