# Input Data

This file contains input data preprocessing modules which will be called by main function which is in training.py.

1. BMP to JPEG
2. get_files
3. get_batch

In [1]:
from PIL import Image
import numpy as np
import os
import tensorflow as tf

## 1. BMP to JPEG

Some format of our pattern is bmp, so we need to convert into jpeg.

In [2]:
def BMPtoJPEG(file_dir):
    dirs = ['nail','scratch','smear']
    new_dirs = ['nail_jpg','scratch_jpg','smear_jpg']
    for nd in new_dirs:
        os.makedirs(file_dir+'/'+nd, exist_ok = True)
        
    for d,nd in zip(dirs, new_dirs):
        base = file_dir+'/'+d
        nbase = file_dir+'/'+nd
        for file in os.listdir(base):
            img = Image.open(base+'/'+file)
            img.save(nbase+'/'+file.replace('.bmp','.jpg'),'JPEG')

## 2. get_files

Get all pattern names in directory, and keep them as list. Label name will be kept as well.

In [3]:
def get_files(file_dir):
    '''
    Args:
        file_dir: file directory
    Returns:
        list of images and labels
    '''
    dirs = ['nail_jpg','scratch_jpg','smear_jpg']
    image_list = []
    label_list = []
    label_num = 0
    for d in dirs:
        base = file_dir+'/'+d
        for file in os.listdir(base):
            image_list.append(base+'/'+file)
            label_list.append(label_num)
        label_num = label_num + 1
    
    
    return image_list, label_list

## 3. get_batch

Create batch from the file list we just made.
Do some image preprocessing, and also shuffle images in batch.

In [4]:
def get_batch(image, label, image_W, image_H, batch_size, capacity):
    '''
    Args:
        image: list type
        label: list type
        image_W: image width
        image_H: image height
        batch_size: batch size
        capacity: the maximum elements in queue
    Returns:
        image_batch: 4D tensor [batch_size, width, height, 3], dtype=tf.float32
        label_batch: 1D tensor [batch_size], dtype=tf.int32
    '''
    image = tf.cast(image, tf.string)
    label = tf.cast(label, tf.int32)
    
    # make an input queue
    input_queue = tf.train.slice_input_producer([image, label])
    
    label = input_queue[1]
    image_contents = tf.read_file(input_queue[0])
    image = tf.image.decode_jpeg(image_contents, channels=3)
    
    #image = tf.image.resize_images(image, [image_W, image_H])
    
    image = tf.image.resize_image_with_crop_or_pad(image, image_W+30, image_H+30)

    image = tf.random_crop(image, [image_W, image_H, 3])
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    # Brightness/saturation/constrast provides small gains .2%~.5% on cifar.
    image = tf.image.random_brightness(image, max_delta=64. / 255.)
    image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
    image = tf.image.random_contrast(image, lower=0.2, upper=1.8)

    
    # while inference, you should comment the following line.
    image = tf.image.per_image_standardization(image)  #Sophie: tf 1.0
    #image = tf.image.per_image_whitening(image) #Sophie: tf 0.11
 
    #image_batch, label_batch = tf.train.batch([image, label],
                                                # batch_size= batch_size,
                                                # num_threads= 64, 
                                                # capacity = capacity)
    
    #you can also use shuffle_batch 
    image_batch, label_batch = tf.train.shuffle_batch([image,label], 
                                                      batch_size=batch_size, 
                                                      num_threads=64, 
                                                      capacity=capacity, 
                                                      min_after_dequeue=capacity-1)

    label_batch = tf.reshape(label_batch, [batch_size])
    image_batch = tf.cast(image_batch, tf.float32)
    
    return image_batch, label_batch
