## images in the subfolder

In [1]:
## images in the subfolder
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [2]:
img_height = 28
img_width = 28
batch_size = 2


In [3]:
model = keras.Sequential(
    [
        layers.Input((28, 28, 1)),
        layers.Conv2D(16, 3, padding="same"),
        layers.Conv2D(32, 3, padding="same"),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(10),
    ]
)


In [4]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [5]:
data_path = "/content/gdrive/MyDrive/Deep Learning/Basics/tutorial18-customdata-images/"

In [6]:
#                      METHOD 1
# ==================================================== #
#             Using dataset_from_directory             #
# ==================================================== #
ds_train = tf.keras.preprocessing.image_dataset_from_directory(
    data_path + "data/mnist_subfolders/",
    labels="inferred",
    label_mode="int",  # categorical, binary
    # class_names=['0', '1', '2', '3', ...]
    color_mode="grayscale",
    batch_size=batch_size,
    image_size=(img_height, img_width),  # reshape if not in this size
    shuffle=True,
    seed=123,
    validation_split=0.1,
    subset="training",
)

ds_validation = tf.keras.preprocessing.image_dataset_from_directory(
    data_path + "data/mnist_subfolders/",
    labels="inferred",
    label_mode="int",  # categorical, binary
    # class_names=['0', '1', '2', '3', ...]
    color_mode="grayscale",
    batch_size=batch_size,
    image_size=(img_height, img_width),  # reshape if not in this size
    shuffle=True,
    seed=123,
    validation_split=0.1,
    subset="validation",
)


Found 50 files belonging to 10 classes.
Using 45 files for training.
Found 50 files belonging to 10 classes.
Using 5 files for validation.


In [7]:
def augment(x, y):
    image = tf.image.random_brightness(x, max_delta=0.05)
    return image, y


In [9]:
ds_train = ds_train.map(augment)

In [10]:
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=[keras.losses.SparseCategoricalCrossentropy(from_logits=True),],
    metrics=["accuracy"],
)


In [11]:
model.fit(ds_train, epochs=1, verbose=2)

23/23 - 18s - loss: 69.0404 - accuracy: 0.1778 - 18s/epoch - 794ms/step


<keras.callbacks.History at 0x7f5922ab0430>

In [12]:
#                           METHOD 2
# ================================================================== #
#             ImageDataGenerator and flow_from_directory             #
# ================================================================== #

datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=5,
    zoom_range=(0.95, 0.95),
    horizontal_flip=False,
    vertical_flip=False,
    data_format="channels_last",
    validation_split=0.20,
    dtype=tf.float32,
)

train_generator = datagen.flow_from_directory(
    data_path + "data/mnist_subfolders/",
    target_size=(img_height, img_width),
    batch_size=batch_size,
    color_mode="grayscale",
    class_mode="sparse",
    shuffle=True,
    subset="training",
    seed=123,
)

# val_generator = datagen.flow_from_directory(
#     data_path + "data/mnist_subfolders/",
#     target_size=(img_height, img_width),
#     batch_size=batch_size,
#     color_mode="grayscale",
#     class_mode="sparse",
#     shuffle=True,
#     subset="validation",
#     seed=123,
# )

Found 40 images belonging to 10 classes.


In [13]:
# Redo model.compile to reset the optimizer states
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=[keras.losses.SparseCategoricalCrossentropy(from_logits=True),],
    metrics=["accuracy"],
)


In [None]:
# using model.fit (note steps_per_epoch)
# model.fit(
#     train_generator,
#     epochs=10,
#     steps_per_epoch=25,
#     verbose=2
   
# )


## images in csv

In [39]:
directory = data_path + "data/mnist_images_csv/"
import pandas as pd
df = pd.read_csv(directory + "train.csv")

file_paths = df["file_name"].values
labels = df["label"].values
ds_train = tf.data.Dataset.from_tensor_slices((file_paths, labels))


In [33]:
next(iter(ds_train))

(<tf.Tensor: shape=(), dtype=string, numpy=b'0_1.jpg'>,
 <tf.Tensor: shape=(), dtype=int64, numpy=0>)

In [40]:
def read_image(image_file, label):
    image = tf.io.read_file(directory + image_file)
    image = tf.image.decode_image(image, channels=1, dtype=tf.float32)
    return image, label


def augment(image, label):
    # data augmentation here
    return image, label


In [41]:
ds_train = ds_train.map(read_image).map(augment).batch(2)


In [42]:
model = keras.Sequential(
    [
        layers.Input((28, 28, 1)),
        layers.Conv2D(16, 3, padding="same"),
        layers.Conv2D(32, 3, padding="same"),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(10),
    ]
)


In [43]:
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=[keras.losses.SparseCategoricalCrossentropy(from_logits=True),],
    metrics=["accuracy"],
)


In [44]:
model.fit(ds_train, epochs=1, verbose=2)

25/25 - 6s - loss: 2.5799 - accuracy: 0.0800 - 6s/epoch - 225ms/step


<keras.callbacks.History at 0x7f589d7bf490>

## in single folder

In [45]:
import pathlib  # pathlib is in standard library

batch_size = 2
img_height = 28
img_width = 28

directory = data_path + "data/mnist_images_only/"
ds_train = tf.data.Dataset.list_files(str(pathlib.Path(directory + "*.jpg")))


In [47]:
file_path = next(iter(ds_train))

In [51]:
def process_path(file_path):
    image = tf.io.read_file(file_path)
    image = tf.image.decode_jpeg(image, channels=1)
    label = tf.strings.split(file_path, "/")
    label = tf.strings.substr(label, pos=0, len=1)[-1]
    label = tf.strings.to_number(label, out_type=tf.int64)
    return image, label


In [52]:
ds_train = ds_train.map(process_path).batch(batch_size)


In [53]:
next(iter(ds_train))

(<tf.Tensor: shape=(2, 28, 28, 1), dtype=uint8, numpy=
 array([[[[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],
 
         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],
 
         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],
 
         ...,
 
         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],
 
         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],
 
         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]]],
 
 
        [[[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],
 
         [[0],
          [0],
          [0],
          ...,
          [0],
          [0],
          [0]],
 
         [[0],
          [0],
          [0],
          ...,
  

In [54]:
model = keras.Sequential(
    [
        layers.Input((28, 28, 1)),
        layers.Conv2D(16, 3, padding="same"),
        layers.Conv2D(32, 3, padding="same"),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(10),
    ]
)


In [55]:
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=[keras.losses.SparseCategoricalCrossentropy(from_logits=True),],
    metrics=["accuracy"],
)


In [56]:
model.fit(ds_train, epochs=1, verbose=2)


25/25 - 5s - loss: 50.8953 - accuracy: 0.1400 - 5s/epoch - 203ms/step


<keras.callbacks.History at 0x7f589bd1b1f0>