In [6]:
# Imports
import os, warnings
import matplotlib.pyplot as plt
from matplotlib import gridspec

import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow import keras
from keras import layers
from keras.layers import Dense, Flatten, MaxPooling2D, PreprocessingLayer, Dropout


# Project 2 - Veggie Classification

For this assignment you'll need to classify some images of vegetables. 

## Parts

Please do two separate classifications:
<ol>
<li> Create a model from scratch. 
<li> Use transfer learning to use a pretrained model. 
</ol>

## Deliverables

Please submit your files into Moodle as follows:
<ul>
<li> A zipped .h5 model that has been trained. See the notes towards the end of the file. 
<li> Your .ipynb file. 
<li> A note (~1 to 2 paragraphs) in the comments of Moodle noting what you did to improve accuracy beyond just making a model. 
</ul>

If you save your model into a folder, the test file should work with your code. That is what I am going to run, with more and different test data. 

## Load Training and Validation Data

Please download the zip file from moodle and place it into your folder. If running on Colab you'll need to load it into the workspace. 

#### Colab and Files

You can load files into your Colab workspace via a drag and drop, however this file storage is temporary and will go away when you end your runtime. You can also use your Google drive to store it without having to load it multiple times - there are lots of guides online to setting this up. 

#### Temporary Files

If there are temporary files generated anywhere in the image folders you will need to remove them or you will probably get weird errors. For example, on a Mac (which I'm using to make this), there are temporary files that start with "._". If these appear for you, you can clear them via:
<ul>
<li> Mac: open a terminal at the top level of the image folder and run "dot_clean -n ."
<li> Windows: open a command prompt at the top level of the image folder and run "find . -name "._*" -exec rm '{}' \; -print"
</ul>

If this doesn't work, or if there are any other temporary files created in your file system you can remove them any other way - e.g. use search to find the files and remove them, sort by filetype and delete, etc... The commands above are just shortcuts. 

#### File Naming

Once things are unzipped ensure that the paths are correct and match your file paths. 

In [7]:
def bar_custom(current, total, width=80):
    print("Downloading: %d%% [%d / %d] bytes" % (current / total * 100, current, total))

In [8]:
import wget

print('Beginning file download with wget module')

wget.download(url, "train.zip", bar=bar_custom)

Beginning file download with wget module
Downloading: 0% [0 / 560031432] bytes
Downloading: 0% [8192 / 560031432] bytes
Downloading: 0% [16384 / 560031432] bytes
Downloading: 0% [24576 / 560031432] bytes
Downloading: 0% [32768 / 560031432] bytes
Downloading: 0% [40960 / 560031432] bytes
Downloading: 0% [49152 / 560031432] bytes
Downloading: 0% [57344 / 560031432] bytes
Downloading: 0% [65536 / 560031432] bytes
Downloading: 0% [73728 / 560031432] bytes
Downloading: 0% [81920 / 560031432] bytes
Downloading: 0% [90112 / 560031432] bytes
Downloading: 0% [98304 / 560031432] bytes
Downloading: 0% [106496 / 560031432] bytes
Downloading: 0% [114688 / 560031432] bytes
Downloading: 0% [122880 / 560031432] bytes
Downloading: 0% [131072 / 560031432] bytes
Downloading: 0% [139264 / 560031432] bytes
Downloading: 0% [147456 / 560031432] bytes
Downloading: 0% [155648 / 560031432] bytes
Downloading: 0% [163840 / 560031432] bytes
Downloading: 0% [172032 / 560031432] bytes
Downloading: 0% [180224 / 56003

'train (1).zip'

In [9]:
# UNZIP - Ensure that the filename is correct
import zipfile

zip_name = "train.zip"

with zipfile.ZipFile(zip_name, 'r') as zip_ref:
    zip_ref.extractall()

In [20]:
# Declare Parameters
batch_size = 64
BASE_EPOCHS = 3
MONITOR = "val_accuracy"
LOGS = "logs"
DIR_OUT = "kt_out"
PROJECT = "proj2_test"
VALIDATION = .2

stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, mode="max")

### Data Processing

You probably want to use datsets for efficiency purposes. 

In [9]:
# Generate Datasets - you can change this if desired
# ENSURE FILE PATHS MATCH CORRECTLY
IMAGE_SIZE=(224,224)
train_dir='Vegetable Images/train'
val_dir='Vegetable Images/validation'

# Load training data
train_ds = image_dataset_from_directory(
    train_dir,
    label_mode='categorical',
    image_size = IMAGE_SIZE,
    shuffle=True,
    batch_size=batch_size
)

val_ds = image_dataset_from_directory(
    val_dir,
    label_mode='categorical',
    image_size = IMAGE_SIZE,
    shuffle=True,
    batch_size = batch_size
)

Found 15000 files belonging to 15 classes.
Found 3000 files belonging to 15 classes.


In [10]:
# Data Pipeline
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE
train_ds = (
    train_ds
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)
val_ds = (
    val_ds
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

#### Training and Validation Data

After the file has been unzipped and loaded into datasets, you should see:

Found 20000 files belonging to 10 classes.
Found 4000 files belonging to 10 classes.

The first is the training dataset, the second is the validation dataset. 

In [1]:
import keras_tuner as kt
import datetime

2023-03-29 11:27:55.505362: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [33]:
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.InputLayer(input_shape=(224, 224, 3)))
    for i in range(hp.Int("num_layers", 1, 3)):
        size1 = hp.Int("kernel_L"+str(i), min_value=32, max_value=512, step=64)
        model.add(layers.Conv2D(size1, (2, 2), activation='relu', padding="same", kernel_regularizer="l2"))
        model.add(layers.Conv2D(size1, (2, 2), activation='relu', padding="same", kernel_regularizer="l2"))
        model.add(layers.MaxPooling2D((2, 2)))
    
    model.add(Flatten())
    for i in range(hp.Int("num_dense", 1, 3)):
        size2 = hp.Int("dense_L"+str(i), min_value=32, max_value=512, step=64)
        model.add(layers.Dense(size2, activation="relu", kernel_regularizer="l2"))
    model.add(layers.Dense(15, activation="softmax"))
    
    model.compile(
        optimizer=keras.optimizers.Adam(),
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

In [34]:
file_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tuner = kt.BayesianOptimization(build_model,
                        objective=MONITOR,
                        max_trials=8,
                        directory=DIR_OUT+"/"+file_time,
                        project_name=PROJECT,
                        overwrite=True)

In [35]:
# Get Results
tuner.search(train_ds, validation_data=val_ds, epochs=BASE_EPOCHS, callbacks=[stopping])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]


Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
2                 |?                 |num_layers
160               |?                 |kernel_L0
3                 |?                 |num_dense
32                |?                 |dense_L0

Epoch 1/3


In [None]:
file_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=LOGS+"/kt_search/"+file_time, histogram_freq=1, write_images=False)

model = tuner.hypermodel.build(best_hps)
history = model.fit(train_ds, validation_data=val_ds, epochs=BASE_EPOCHS, validation_split=VALIDATION)

val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

In [14]:
# Do Modelling Stuff

### Write the Weights

Write out the weights of whatever model you are going to finish up with. 

In [15]:
# Write weights

## PreTrained Model

Do the work for the pretrained model here. 
