## Install dependencies



In [None]:
%pip install -r requirements.txt

## Upload data

Group images by separating each class into one folder then wrap all the folder into another folder.

Ex.
```
data/ 
  │
  └─── class1/
  │        │
  |        └─── image1.png
  │        └─── image2.jpg
  |        └─── ...
  │   
  └─── class2/
  │        │
  |        └─── image123.png
  │        └─── image456.jpg
  |        └─── ...
  |
  └─── .../
           │
           └─── ...
```





## Remove unecessary files

Image file extension that are only acceptable are selected in the `image_exts`.

In [1]:
import cv2, imghdr, os
import numpy as np

In [2]:
data_dir = './data'
image_exts = ['jpeg', 'jpg', 'bmp', 'png']

In [3]:
for image_class in os.listdir(data_dir):
  for image in os.listdir(os.path.join(data_dir, image_class)):
    image_path = os.path.join(data_dir, image_class, image)
    try:
      img = cv2.imread(image_path)
      tip = imghdr.what(image_path)
      if tip not in image_exts:
        print(f'Image not in ext list {image_path}')
        os.remove(image_path)
    except Exception as e:
            print(f'Issue with image {image_path}')

## Configure settings

Apply all the settings here for preprocessing, building, and training the neural network.

In [4]:
IMAGE_SIZE = (256, 256) # Square sized are recommended for stability
EPOCHS = 20
BATCH_SIZE = ...
KERNEL_SIZE = (3, 3)
STRIDES = 1

# Ratio for splitting dataset
TRAIN_VAL = 0.7
VALID_VAL = 0.2
TEST_VAL = 0.1

## Prepare, randomize, and normalize the images

Using the `tf.keras.utils.image_dataset_from_directory` with image_size of (256, 256).

See https://www.tensorflow.org/api_docs/python/tf/keras/utils/image_dataset_from_directory

In [5]:
import tensorflow as tf

In [6]:
data = tf.keras.utils.image_dataset_from_directory(data_dir, image_size=IMAGE_SIZE, shuffle=True) # Images are resized and shuffled
class_names = data.class_names
data = data.map(lambda x, y : (x/255, y)) # Normalize data between 0 and 1

Found 171 files belonging to 2 classes.


In [7]:
for image_batch, labels_batch in data:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

(32, 256, 256, 3)
(32,)


## Split the input and output pairs for training

Randomly split input and output pairs into sets of data: 70% for training, 20% for validation, and 10% for testing.

  - the training set is used to train the model
  - the validation set is used to measure how well the model is performing during training
  - the testing set is used to test the model after training

In [8]:
train_size = round(len(data)*TRAIN_VAL)
val_size = round(len(data)*VALID_VAL)
test_size = round(len(data)*TEST_VAL)
assert train_size + val_size + test_size == len(data), f'sum must be {len(data)}'

train = data.take(train_size)
val = data.skip(train_size).take(val_size)
test = data.skip(train_size + val_size).take(test_size)


## Configure the dataset for performance

* [Dataset.cache](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#cache) keeps the images in memory after they're loaded off disk during the first epoch. This will ensure the dataset does not become a bottleneck while training your model. If your dataset is too large to fit into memory, you can also use this method to create a performant on-disk cache.

* [Dataset.prefetch](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#prefetch) overlaps data preprocessing and model execution while training.

In [9]:
AUTOTUNE = tf.data.AUTOTUNE

train = train.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val = val.cache().prefetch(buffer_size=AUTOTUNE)
test = test.cache().prefetch(buffer_size=AUTOTUNE)

## Build & Train the Model

Build and train a [TensorFlow](https://www.tensorflow.org) model using the high-level [Keras](https://www.tensorflow.org/guide/keras) API.

### Build the neural network

In [10]:
num_classes = len(class_names)

model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Conv2D(16, KERNEL_SIZE, STRIDES, padding='same', activation='relu', input_shape=IMAGE_SIZE+(3,)))
model.add(tf.keras.layers.MaxPooling2D())

model.add(tf.keras.layers.Conv2D(32, KERNEL_SIZE, STRIDES, padding='same', activation='relu'))
model.add(tf.keras.layers.MaxPooling2D())

model.add(tf.keras.layers.Conv2D(64, KERNEL_SIZE, STRIDES, padding='same', activation='relu'))
model.add(tf.keras.layers.MaxPooling2D())

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))


model.compile('adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 256, 256, 16)      448       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 128, 128, 16)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 128, 128, 32)      4640      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 64, 64, 32)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 64, 64, 64)        18496     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 32, 32, 64)       0

### Train

In [14]:
history = model.fit(train, epochs=20, validation_data=val)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## Evaluate with test data

In [29]:
from tabulate import tabulate

points = test.map(lambda x, y: x)
labels = test.map(lambda x, y: y)
test_yhat = model.predict(points)
test_yhat = tf.math.argmax(test_yhat, -1)

# print the predictions and the expected ouputs
result = list(zip(np.array(test_yhat), list(labels.as_numpy_iterator())[0]))
delta = [True if elem[0] == elem[1] else False for elem in result]
table = list(zip(*zip(*result), delta))

print (tabulate(table, headers=["Predictions", "Expected", "Result"], tablefmt="psql"))

loss, acc = model.evaluate(test)
print(f'Model loss (Test set): {loss}')
print(f'Model Accuracy (Test set): {acc}')

+---------------+------------+----------+
|   Predictions |   Expected | Result   |
|---------------+------------+----------|
|             0 |          0 | True     |
|             0 |          1 | False    |
|             0 |          0 | True     |
|             0 |          0 | True     |
|             0 |          0 | True     |
|             0 |          0 | True     |
|             0 |          0 | True     |
|             0 |          0 | True     |
|             0 |          0 | True     |
|             1 |          1 | True     |
|             0 |          0 | True     |
+---------------+------------+----------+
Model loss (Test set): 0.34396740794181824
Model Accuracy (Test set): 0.9090909361839294


## Save the model

 To test different brand new images that are not in the `data_dir`.

### .h5

In [16]:
model.save('./models/model.h5')

### .tflite

In [17]:
# Convert the model to the TensorFlow Lite format

converter = tf.lite.TFLiteConverter.from_keras_model(model)
# # USE QUANTIZATION
# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
tflite_model = converter.convert()

# Save the model to disk
open("./models/model.tflite", "wb").write(tflite_model)



INFO:tensorflow:Assets written to: C:\Users\Bahillo\AppData\Local\Temp\tmpc7mjmn8s\assets


INFO:tensorflow:Assets written to: C:\Users\Bahillo\AppData\Local\Temp\tmpc7mjmn8s\assets


33654376

#### Optional
For edge devices, see [supported platforms](https://www.tensorflow.org/lite/microcontrollers#supported_platforms)

If the device requires to use C header file (.h) ...





In [18]:
def hex_to_c_array(hex_data) -> str:
    # Declare C variable
    c_str = 'unsigned char model[] = {'
    hex_array = []

    for i, val in enumerate(hex_data):
        # Construct string from hex
        hex_str = format(val, '#04x')

        # Add formatting so each line stays within 80 characters
        if (i + 1) < len(hex_data):
            hex_str += ','
        if (i + 1) % 12 == 0:
            hex_str += '\n'
        hex_array.append(hex_str)

    # Wrapping up
    c_str += '\n ' + format(' '.join(hex_array)) + '};'

    return c_str

with open('./models/model.h', 'w') as f:
    content = hex_to_c_array(tflite_model)
    f.write(content)

## Test new images

### Load the model using the **.h5** file

In [36]:
loaded_model =  tf.keras.models.load_model('./models/model.h5')

### Test the result

In [42]:
img = tf.keras.utils.load_img('happytest.jpg', target_size=IMAGE_SIZE) # replace with your file name here
img_array = tf.keras.utils.img_to_array(img)
img_array = tf.expand_dims(img_array/255, 0) # Scale from 0 to 1 and create a batch

yhat = model.predict(img_array)
score = yhat[0]

print(score)

print(
    "This image most likely belongs to {} with a {:.2f} percent confidence."
    .format(class_names[np.argmax(score)], 100 * np.max(score))
)



# if yhat > 0.5:
#     print('Predicted class is Sad')
# else:
#     print('Predicted class is Happy')

[1.000000e+00 7.235012e-10]
This image most likely belongs to happy with a 100.00 percent confidence.
