In [1]:
import tensorflow as tf
import cv2
import glob
import random
import numpy as np
import pandas as pd
import csv
import time
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# Preprocess masks

### marker-to-mask converter

In [2]:
def make_mask(marker_string, nv, nh):
    # initial mask is empty 1D array
    mask = np.zeros(nv * nh)
    
    # if markers exist, add detect part according to markers
    if len(marker_string) > 0:
        markers = np.array(marker_string.split(' ')).reshape(-1, 2)
        for marker in markers:
            start = int(marker[0])
            length = int(marker[1])
            mask[start: start+length] = 1.0
            
    mask = np.reshape(mask, (nv, nh))

    return mask            

### extract the first N image IDs and their masks

In [3]:
N = 40 # read the top N rows from file => will get N/4 masks
D0 = 256
D1 = 1600

train_mask_file = "data/train.csv"

with open(train_mask_file, "r", newline="") as f:
    reader = csv.reader(f)
    header = next(reader)
    print("CSV header: {}".format(header))
    
    masks = dict()
    for i, row in enumerate(reader):
        if i < N:
            # read image id and defect type
            img_id, defect_type = row[0].split('.jpg_')
        
            # process mask
            mask_marker_string = row[1]
            mask = make_mask(mask_marker_string, D0, D1)
        
            if img_id in masks:
                # if "masks" already has this image's mask(s), add this mask to the right channel
                masks[img_id][:, :, int(defect_type) - 1] = mask
            else:
                # if "masks" doesn't contain this image's info, create a 4-channel mask with zeros
                masks[img_id] = np.zeros((D0, D1, 4))  # dict({defect_type: mask})

CSV header: ['ImageId_ClassId', 'EncodedPixels']


In [4]:
# random.seed(2)
# img_id_0 = random.choice(list(masks.keys()))
# print(img_id_0)
# print(masks[img_id_0])

# Preprocess training images

## Import images

In [5]:
train_img_dir = 'data/train_images/'
train_img_suffix = '.jpg'

def get_image(img_id):
    image_path = train_img_dir + img_id + train_img_suffix
    print(image_path)
    # import image
    image = mpimg.imread(image_path)
    image = image / 255.0
    return image

## Create dataset

In [6]:
image_list = []
mask_list = []
serial_list = []
for serial, mask in masks.items():
    image = get_image(serial)
    image_list = image_list + [image]
    mask_list = mask_list + [mask]
    serial_list = serial_list + [serial]

ds = tf.data.Dataset.from_tensor_slices((np.array(image_list), np.array(mask_list)))

data/train_images/0002cc93b.jpg
data/train_images/00031f466.jpg
data/train_images/000418bfc.jpg
data/train_images/000789191.jpg
data/train_images/0007a71bf.jpg
data/train_images/000a4bcdd.jpg
data/train_images/000f6bf48.jpg
data/train_images/0014fce06.jpg
data/train_images/001982b08.jpg
data/train_images/001d1b355.jpg


# Set up model

## Create stack

In [7]:
base_model = tf.keras.applications.MobileNetV2(input_shape=[D0, D1, 3], include_top=False)

# Use the activations of these layers
layer_names = [
    'block_1_expand_relu',   # 64x64
    'block_3_expand_relu',   # 32x32
    'block_6_expand_relu',   # 16x16
    'block_13_expand_relu',  # 8x8
    'block_16_project',      # 4x4
]
layers = [base_model.get_layer(name).output for name in layer_names]

# Create the feature extraction model
stack = tf.keras.Model(inputs=base_model.input, outputs=layers)

stack.trainable = True

## Define model

In [8]:
def unet_model(output_channels):

    # This is the last layer of the model
    last = tf.keras.layers.Conv2DTranspose(
        output_channels, 3, strides=2,
        padding='same', activation='softmax')  #64x64 -> 128x128

    inputs = tf.keras.layers.Input(shape=[D0, D1, 3])
    x = inputs

    # Downsampling through the model
    skips = stack(x)
    x = skips[-1]
    skips = reversed(skips[:-1])

    x = last(x)

    return tf.keras.Model(inputs=inputs, outputs=x)

## Configure training

In [9]:
model = unet_model(4)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [10]:
def create_mask(pred_mask):
    pred_mask = tf.argmax(pred_mask, axis=-1)
    pred_mask = pred_mask[..., tf.newaxis]
    return pred_mask[0]

In [11]:
model.predict(ds)

ValueError: Error when checking input: expected input_2 to have 4 dimensions, but got array with shape (256, 1600, 3)