In [1]:
import tensorflow as tf
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from functools import partial
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, Conv2D, MaxPool2D
from datetime import datetime
from keras.preprocessing import image

Using TensorFlow backend.


In [2]:
# Working directory; to the tfrecord files
cwd = os.getcwd()
tfrecord_files_dir = (cwd + '/tfrecords')

In [3]:
# Prepare the lists of train and test tfrecords files
tfrecord_files = os.listdir(tfrecord_files_dir)
full_train_tfrecords = []
test_tfrecords = []
for i in tfrecord_files:
    if i[:4] == 'trai':
        full_train_tfrecords.append(tfrecord_files_dir + '/' + i)
    elif i[:4] == 'test':
        test_tfrecords.append(tfrecord_files_dir + '/' + i)

In [4]:
test_tfrecords

['/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test14-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test15-677.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test10-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test11-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test12-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test13-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test01-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test00-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test03-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test02-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test05-687.tfrec',
 '/sfs/lustre/bahamut/scratch/uk7ud/Kaggle Melanoma/tfrecords/test04-687.tfrec',
 '/sfs/lustre/bahamut/scratc

In [5]:
# Define functions to create train and validation datasets

def get_test_images(tfrecord):
    test_feature_descriptions = {
        "image": tf.io.VarLenFeature(tf.string),
        "image_name": tf.io.FixedLenFeature([], tf.string, default_value=""),
    }
    example = tf.io.parse_single_example(tfrecord, test_feature_descriptions)
    image = tf.io.decode_image(example["image"].values[0])
    image = tf.reshape(image, shape=[1024, 1024, 3])
    image = tf.image.resize(image, input_shape[:2], method='nearest')
    return image/255, example["image_name"]

def create_test_dataset(filepaths, n_read_threads=5, n_parse_threads=5, batch_size=1):
    dataset = tf.data.TFRecordDataset(filepaths, num_parallel_reads=n_read_threads)
    dataset = dataset.map(get_test_images, num_parallel_calls=n_parse_threads)
    dataset = dataset.batch(batch_size)
    return dataset.prefetch(1)

In [7]:
input_shape = [300,300,3]

In [8]:
test_set = create_test_dataset(test_tfrecords)

In [9]:
test_set

<PrefetchDataset shapes: ((None, 300, 300, 3), (None,)), types: (tf.float32, tf.string)>

In [10]:
model = keras.models.load_model('my_model_tfrecords.h5')

In [51]:
test_images = []
predictions = []

def get_predictions(tfrecords):
    
    for item in tfrecords:
        img_no = item[1].numpy()[0]
        test_images.append(img_no.decode('utf-8'))
        img = item[0] 
        pred = model.predict_classes(img)
        predictions.append(pred[0][0])
        
    predictions_d = pd.DataFrame(list(zip(test_images, predictions)), columns =['image_name', 'target'])
    predictions_df = predictions_d.sort_values(by=['image_name'])
    
    predictions_df.to_csv(os.path.join(cwd, 'predictions.csv'), index = False, header=True)
    
    return predictions_df

In [52]:
predictions_tfrecords = get_predictions(test_set)

In [53]:
np.array(predictions_tfrecords['target']).sum()

4854