# Preprocessing

* Read the picture files
* Decode JPEG content to RGB pixels
* Convert this into floating tensors
* Rescale pixel values (between 0 to 255) to [0,1] interval.

In [5]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
#import tensorflow_datasets as tfds

from tensorflow.keras import layers

# Simple Model

In [7]:
def linear_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape=[HEIGHT, WIDTH, 3], name='image'))
    model.add(tf.keras.layers.Flatten(data_format="channels_last"))
    # We want to have a simple linear model so we have 
    # no activation function. 
    model.add(tf.keras.layers.Dense(units=NCLASSES, activation=None))
    return model

# Keras Preprocessing + Augmentation using ImageDataGenerator



- Rescale is a value by which we will multiply the data before any other processing. Our original images consist in RGB coefficients in the 0-255, but such values would be too high for our models to process (given a typical learning rate), so we target values between 0 and 1 instead by scaling with a 1/255. factor.


In [6]:
# Preprocessing using ImageDataGenerator - this is just a showcase what we can do with imagedatagenerator

tf.keras.preprocessing.image.ImageDataGenerator(
    featurewise_center=False, samplewise_center=False,
    featurewise_std_normalization=False, samplewise_std_normalization=False,
    zca_whitening=False, zca_epsilon=1e-06, rotation_range=0, width_shift_range=0.0,
    height_shift_range=0.0, brightness_range=None, shear_range=0.0, zoom_range=0.0,
    channel_shift_range=0.0, fill_mode='nearest', cval=0.0,
    horizontal_flip=False, vertical_flip=False, rescale=None,
    preprocessing_function=None, data_format=None, validation_split=0.0, dtype=None
)

<keras.preprocessing.image.ImageDataGenerator at 0x29e40cd60>

In [None]:
# we could maybe use this as follows: 

from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

# flow_from_directory : Takes the path to a directory & generates batches of augmented data.
# use "rescale" to scale array of original image pixel values to be between [0,1] and specify the parameter rescale=1./255.

def read_and_preprocess(test_datagen, train_datagen, augment_randomly=False):
    if augment_randomly=False
        train_datagen = ImageDataGenerator(
                rotation_range=40,
                width_shift_range=0.2,
                height_shift_range=0.2,
                rescale=1./255,
                shear_range=0.2,
                zoom_range=0.2,
                horizontal_flip=True,
                fill_mode='nearest')

        test_datagen = ImageDataGenerator(rescale=1./255)
    
    else:
        train_datagen = ImageDataGenerator(rescale=1./255)
        test_datagen = ImageDataGenerator(rescale=1./255)
        
    return train_datagen, test_datagen

# this is a generator that will read pictures found in
# subfolers of 'data/train', and indefinitely generate
# batches of augmented image data

def generate_augmented_image(train_datagen, test_datagen, augment_randomly=False): 
     if augment_randomly=False
        train_generator = train_datagen.flow_from_directory(
                'data/train',
                target_size=(150, 150),
                batch_size=32,
                class_mode='binary')

        validation_generator = test_datagen.flow_from_directory(
                'data/validation',
                target_size=(150, 150),
                batch_size=32,
                class_mode='binary')
        
        test_generator = test_datagen.flow_from_directory(
                'data/validation',
                target_size=(150, 150),
                batch_size=32,
                class_mode='binary')
        else:
            

    return generate_augmented_image(train_generator,  validation_generator,test_generator)
        

# Simple Model (Training) -- needs Update--

In [9]:
def train_and_evaluate(model, batch_size):
    
    model.compile(
        optimizer="adam", 
        # The model outputs one-hot-encoded logits, so we need
        # use the sparse version of the crossentropy loss.
        loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
        metrics=['accuracy']
    )
    
    dataset = image_modeling.load_dataset(TRAIN_PATH, batch_size)                                 #update this 
    eval_dataset = image_modeling.load_dataset(EVAL_PATH, batch_size, training=False)             #update this
    
    log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

    model.fit(
        train_generator, 
        validation_data=validation_generator,
        steps_per_epoch=TRAINING_STEPS, 
        epochs=10,
        callbacks=[tensorboard_callback],
        model.save_weights('first_try.h5')  # always save your weights after training or during training
    )
    
    
    return model

Let us use Tensorboard to monitor our results:

In [None]:
%tensorboard --logdir logs/fit

In [None]:
# Write a testing function.
def test(model):
    
    test_dataset = image_modeling.load_dataset(TEST_PATH, batch_size=1, training=False)
    model.evaluate(test_dataset)

In [None]:
# Call the testing function for our model
test(model)