In [1]:
import os
# Turn off info messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

import tensorflow as tf
from keras import models, layers, regularizers
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Data prep
NUM_PIXELS        = 224
IMAGE_SIZE        = (NUM_PIXELS, NUM_PIXELS)
VALIDATION_SPLIT  = .20
SHUFFLING_SEED    = 123  # Arbitrary; ensures validation set is stable between runs
BATCH_SIZE        = 16
NUM_EPOCHS        = 5

# CNN parameters
KERNEL_SIZE       = (4,4)  # Larger-than-typical kernel for large features in rice
POOL_SIZE         = (4,4)  # Larger pool size to save more memory

# Reduce overfitting
DROPOUT_RATE      = 0.2
LAMBDA_2          = 0.01

In [3]:
def build_dataset(data_dir, subset):
	return tf.keras.preprocessing.image_dataset_from_directory(
		data_dir,
		validation_split=VALIDATION_SPLIT,
		subset=subset,
		label_mode="categorical",
		seed=SHUFFLING_SEED,
		image_size=IMAGE_SIZE,
		batch_size=1)

In [4]:
def set_new_checkpoint_callback(checkpoint_dir):
	checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt_{epoch}')
	checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
				filepath=checkpoint_prefix,
				save_weights_only=True)
	return checkpoint_callback

In [5]:
def prepare_new_model(image_size, num_output_neurons):
	
	data_augmentation = models.Sequential([
		layers.RandomFlip("horizontal_and_vertical"),
		layers.RandomRotation(0.2),
	])
	
	model = models.Sequential([
		data_augmentation,  # reduce overfitting by applying random transformations
		layers.Conv2D(32, kernel_size=KERNEL_SIZE, activation='relu', input_shape=image_size + (3,)),
		layers.MaxPooling2D(pool_size=POOL_SIZE),
		layers.Conv2D(64, kernel_size=KERNEL_SIZE, activation='relu'),
		layers.MaxPooling2D(pool_size=POOL_SIZE),
		layers.Flatten(),
		layers.Dense(128, activation='relu', kernel_regularizer=regularizers.L2(LAMBDA_2)),
		layers.Dropout(DROPOUT_RATE),
		layers.Dense(num_output_neurons, activation='softmax')
	])
	
	model.compile(loss='categorical_crossentropy',
				optimizer='adam',
				metrics=['accuracy'])
	
	return model

In [6]:
tf.keras.utils.get_file(origin='https://www.muratkoklu.com/datasets/vtdhnd09.php', extract=True)

Downloading data from https://www.muratkoklu.com/datasets/vtdhnd09.php


'/root/.keras/datasets/vtdhnd09.php'

In [7]:
!ls /root/.keras/datasets/

Rice_Image_Dataset  vtdhnd09.php


In [8]:
data_dir = "/root/.keras/datasets/Rice_Image_Dataset"  # lowercase 's' in 'Dataset'

In [9]:
train_ds = build_dataset(data_dir, "training")

# extract size and class names before batching
train_ds_size = train_ds.cardinality().numpy()
class_names   = tuple(train_ds.class_names)
train_ds = train_ds.unbatch().batch(BATCH_SIZE)
train_ds = train_ds.repeat()

val_ds = build_dataset(data_dir, "validation")
val_ds_size   = val_ds.cardinality().numpy()
val_ds = val_ds.unbatch().batch(BATCH_SIZE)

Found 75000 files belonging to 5 classes.
Using 60000 files for training.
Found 75000 files belonging to 5 classes.
Using 15000 files for validation.


In [10]:
model = prepare_new_model( IMAGE_SIZE, len(class_names) )

In [11]:
steps_per_epoch = train_ds_size // BATCH_SIZE
validation_steps = val_ds_size // BATCH_SIZE

In [12]:
current_iteration = 2

In [13]:
checkpoint_callback = set_new_checkpoint_callback(
      checkpoint_dir=f'drive/MyDrive/Colab Notebooks/RiceML/training_checkpoints_{current_iteration}')

In [14]:
model.fit(train_ds,
			epochs=NUM_EPOCHS,
			steps_per_epoch=steps_per_epoch,
			validation_data=val_ds,
			validation_steps=validation_steps,
			callbacks=[checkpoint_callback])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f6a9c647340>