### Preparing code for running on AWS SageMaker.
We have uploaded our training and test data to AWS S3 bucket:  galaxyimages/training, galaxyimages/test
Test the code on a  vanilla CNN first, before training the deep CNN from tranfer learning.

In [None]:
import boto3
import re
import sagemaker
from sagemaker import get_execution_role
from sagemaker.tensorflow import TensorFlow

role = get_execution_role()


sage_maker_session = sagemaker.Session()
bucket = sage_maker_session.default_bucket()
prefix = 'sagemaker/deep-galaxy-training'

key = 'galaxyimages_mirror'
train_input_path = 's3://{}/{}/training/'.format(bucket, key)

print(bucket)
print(role)
print(train_input_path)

need to put the customised code in a container - a Docker, otherwise SM cannot load modules

In [None]:
%%writefile train_vanilla_cnn.py
# ok no modules called keras..

import os
import argparse
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import models
from tensorflow.keras import layers


if __name__ == '__main__':
    
    parser = argparse.ArgumentParser()
    print(os.environ['SM_CHANNEL_TRAINING'])
    print(os.environ['SM_CHANNEL_VALIDATION'])
    parser.add_argument('--epochs', type=int, default=10)
    parser.add_argument('--learning-rate', type=float, default=0.01)
    parser.add_argument('--batch-size', type=int, default=32)
    parser.add_argument('--gpu-count', type=int, default=os.environ['SM_NUM_GPUS'])
    parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
    parser.add_argument('--training', type=str, default=os.environ['SM_CHANNEL_TRAINING'])
    parser.add_argument('--test', type=str, default=os.environ['SM_CHANNEL_TEST'])

    args, _ = parser.parse_known_args()
    epochs = args.epochs
    lr = args.learning_rate
    batch_size = args.batch_size
    gpu_count = args.gpu_count
    model_dir = args.model_dir
    training_dir = args.training
    validation_dir = args.training
    test_dir = args.test
    
    num_classes = 3
    image_resize = 200
    batch_size_training = batch_size
    batch_size_validation = batch_size
    
    train_datagen = ImageDataGenerator(rescale=1./255,
                                       shear_range=0.2,
                                       zoom_range=0.2,
                                       rotation_range=140,
                                       horizontal_flip=True,
                                       vertical_flip=True)

    training_set = train_datagen.flow_from_directory(training_dir,
                                                     target_size=(image_resize, image_resize),
                                                     batch_size=batch_size_training,
                                                     seed=100,
                                                     subset='training',
                                                     shuffle=False,
                                                     class_mode='categorical')

    STEP_SIZE_TRAIN = training_set.n // training_set.batch_size
    
    valid_datagen = ImageDataGenerator(rescale=1./255,
                                       validation_split=0.12)

    valid_set = valid_datagen.flow_from_directory(training_dir,
                                                  target_size=(image_resize, image_resize),
                                                  batch_size=batch_size_validation,
                                                  seed=100,
                                                  subset='validation',
                                                  shuffle=False,
                                                  class_mode='categorical')
    STEP_SIZE_VALID = valid_set.n // valid_set.batch_size 
    
    cnn = models.Sequential()
    cnn.add(layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[image_resize, image_resize, 3]))
    cnn.add(layers.MaxPool2D(pool_size=2, strides=2))
    cnn.add(layers.Conv2D(filters=36, kernel_size=3, activation='relu'))
    cnn.add(layers.MaxPool2D(pool_size=2, strides=2))
    cnn.add(layers.Flatten())
    cnn.add(layers.Dense(units=128, activation='relu'))
    cnn.add(layers.Dropout(0.5))
    cnn.add(layers.Dense(units=num_classes, activation='softmax')) 
    cnn.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    print(cnn.summary())
    
    fit_results = cnn.fit(x=training_set,
                          steps_per_epoch=STEP_SIZE_TRAIN,
                          validation_data=valid_set,
                          validation_steps=STEP_SIZE_VALID,
                          epochs=epochs
                          )
    

In [None]:
# tf_version = tf.__version__

In [None]:
import sagemaker
from sagemaker.tensorflow import TensorFlow

ON_SAGEMAKER_NOTEBOOK = True

sagemaker_session = sagemaker.Session()
if ON_SAGEMAKER_NOTEBOOK:
    role = sagemaker.get_execution_role()
else:
    role = 'your_role'

train_instance_type = 'ml.m5.large'      # The type of EC2 instance which will be used for training

tf_estimator = TensorFlow(
                          entry_point='train_vanilla_cnn.py',          # our own script
                          role=role,
                          framework_version='2.3.0',              
                          hyperparameters={'epochs': 3,
                                           'batch_size': 32
                                           },
                          py_version='py37',
                          script_mode=True,
                          instance_count=1,                 
                          instance_type=train_instance_type
                          )

In [None]:
train_input_path = 's3://{}/{}/training/'.format(bucket, key)
print(train_input_path)

In [None]:
print("Training ...")
tf_estimator.fit({'training': train_input_path})

In [None]:
import time
endpoint_name = 'galaxyimages'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
end_point = tf_estimator.deploy(initial_instance_count=1,instance_type='ml.m5.4xlarge',endpoint_name=endpoint_name)


In [None]:
print("Deploying ...")
predictor = tf_estimator.deploy(initial_instance_count=1, instance_type=deploy_instance_type)

print("Predictor endpoint name : %s" % predictor.endpoint)

In [None]:
# check model performance
from sagemaker.tensorflow.serving import Predictor
import numpy as  np


test_input_path = "s3://{}/{}/test/".format(bucket, key)


test_datagen = ImageDataGenerator(rescale = 1./255)
test_set = test_datagen.flow_from_directory(test_input_path,
                                            target_size = (200, 200),
                                            batch_size = 32,
                                            seed = 10, 
                                            shuffle = False,
                                            class_mode = 'categorical')

test_set.reset()

Y_pred = model_check.predict(
    test_set,
    steps=test_set.n / test_set.batch_size,
    verbose=1)

y_pred = np.argmax(Y_pred, axis=1)


# Predict
predictor = Predictor(endpoint_name = predictor.endpoint)
Y_pred = predictor.predict(test_set)
y_pred = np.argmax(Y_pred, axis=1)


from sklearn.metrics import classification_report, confusion_matrix

cm = confusion_matrix(test_set.classes, y_pred)

print('The confusion matrix is \n{}\n'.format(cm))

f1 = classification_report(test_set.classes, y_pred, target_names = test_set.class_indices)
print('F1 score is {}\n'.format(f1))

### Clean  up by Deleting Endpoint


In [None]:
sagemaker.Session().delete_endpoint(predictor.endpoint)