# Environment setup

In [1]:
print('Importing pathlib... ',end='')
import pathlib
print('done!')
print('Importing random... ',end='')
import random
print('done!')
print('Importing tqdm... ',end='')
from tqdm import tqdm
print('done!')
print('Importing numpy... ',end='')
import numpy as np
print('done!')
print('Importing sqlite3... ',end='')
import sqlite3
print('done!')
print('Importing tensorflow... ',end='')
import tensorflow as tf
print('done!')
print('Importing keras... ',end='')
from tensorflow import keras
print('done!')


# tf.enable_eager_execution()

print('All libraries imported successfully')

Importing pathlib... done!
Importing random... done!
Importing tqdm... done!
Importing numpy... done!
Importing sqlite3... done!
Importing tensorflow... done!
Importing keras... done!
All libraries imported successfully


In [2]:
DATA_PATH = 'C:/Users/vbfal/projects/halte/data/raw/images'
IMAGE_SIZE = [192, 192]
AUTOTUNE = tf.data.experimental.AUTOTUNE
RANDOM_SEED = 42
TRAIN_SHARE = 0.8

# Function definitions

In [3]:
def get_extension(image_path):
    """Given a string, returns the last 4 characters in lower case"""
    return image_path[-4:].lower()

In [4]:
def preprocess_image(image_path_tensor):
    """Receives a tensor with the path to a JPG image, reads it, resizes and normalizes it.
    Returns tensor with processed image."""
    image = tf.io.read_file(image_path_tensor)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, IMAGE_SIZE)
    image /= 255.0  # normalize to [0,1] range

    return image

# Load data

In [5]:
# Image paths
data_path = pathlib.Path(DATA_PATH)
print('Looking for images in:',data_path,':')

for item in data_path.iterdir():
    print(item)
    
all_image_paths = list(data_path.glob('*/*'))
all_image_paths = [str(path) for path in all_image_paths]
random.seed(RANDOM_SEED)
random.shuffle(all_image_paths)

image_count = len(all_image_paths)
print('found',image_count,'images in path')

Looking for images in: C:\Users\vbfal\projects\halte\data\raw\images :
C:\Users\vbfal\projects\halte\data\raw\images\epee
C:\Users\vbfal\projects\halte\data\raw\images\foil
C:\Users\vbfal\projects\halte\data\raw\images\not_fencing
C:\Users\vbfal\projects\halte\data\raw\images\sabre
found 244 images in path


In [6]:
# labels
label_names = sorted(item.name for item in data_path.glob('*/') if item.is_dir())
print('Labels are: ',end='')
for label in label_names:
    print(label,',',end=' ')
print('')

print('Assigning dictionary to labels')
label_to_index = dict((name, index) for index,name in enumerate(label_names))
label_to_index

Labels are: epee , foil , not_fencing , sabre , 
Assigning dictionary to labels


{'epee': 0, 'foil': 1, 'not_fencing': 2, 'sabre': 3}

In [7]:
all_image_labels = [label_to_index[pathlib.Path(path).parent.name]
                    for path in all_image_paths]

print("First 10 labels indices: ", all_image_labels[:10])

First 10 labels indices:  [0, 2, 0, 1, 2, 3, 3, 2, 3, 0]


# Create dataset

In [8]:
# a different dataset holds the labels
print('Building labels dataset...',end='')
label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(all_image_labels, tf.int64))
print('done.')

# Dataset initially contains only the path strings
print('Building image paths dataset...',end='')
path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)
print('done.')

# Mapping to preprocessing function creates a dataset of the images
print('Turning image paths dataset into images dataset...',end='')
image_ds = path_ds.map(preprocess_image, num_parallel_calls=AUTOTUNE)
print('All images converted')

# and then images and labels are combined into one dataset
# (remember this can only be done because both are in the same order)
print('Combining images and labels datasets...',end='')
image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))
print('done.')


Building labels dataset...done.
Building image paths dataset...done.
Turning image paths dataset into images dataset...All images converted
Combining images and labels datasets...done.


In [9]:
# Dataset split
dataset_size = len(all_image_paths)
train_size = int(TRAIN_SHARE * dataset_size)

train_ds = image_label_ds.take(train_size)
test_ds = image_label_ds.skip(train_size)

In [10]:
train_ds

<TakeDataset shapes: ((192, 192, 3), ()), types: (tf.float32, tf.int64)>

# Model stages

In [15]:
# Define layers
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(192,192,3)),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

In [16]:
# Define model parameters
model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Training

In [17]:
# Training loop
model.fit(train_ds, epochs=5)

W0409 23:15:53.897744 16380 training_utils.py:1353] Expected a shuffled dataset but input dataset `x` is not shuffled. Please invoke `shuffle()` on input dataset.


Epoch 1/5


ValueError: Error when checking input: expected flatten_2_input to have 4 dimensions, but got array with shape (192, 192, 3)

# Evaluation

In [None]:
# Predict on test dataset