## Data Augmentation
The following model is a bean disease classifier utilizing data augmentation techniques.

### Download the necessary data into a colab instance
Lines starting with `!` are run as terminal commands

In [None]:
#First, install wget
! apt install wget -y

!wget --no-check-certificate \
    https://storage.googleapis.com/ibeans/train.zip \
    -O /tmp/train.zip

!wget --no-check-certificate \
    https://storage.googleapis.com/ibeans/validation.zip \
    -O /tmp/validation.zip

!wget --no-check-certificate \
    https://storage.googleapis.com/ibeans/test.zip \
    -O /tmp/test.zip

### Unzip dataset files

In [None]:
import os
import zipfile

local_zip = '/tmp/train.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')

local_zip = '/tmp/validation.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')

local_zip = '/tmp/test.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp/test')

zip_ref.close()

### Apply data augmentation to an existing dataset

In [None]:
# First, install and import TensorFlow
!pip3 install tensorflow

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
  rescale = 1./255,
  rotation_range = 40,
  # images will be randomly shift %20 horizontally
  width_shift_range = 0.2,
  height_shift_range = 0.2,
  shear_range = 0.2,
  zoom_range = 0.2,
  horizontal_flip = True,
  fill_mode = 'nearest'
)

validate_datagen = ImageDataGenerator(
  rescale=1./255
)

### Define data flowers
Data flowers will provide data flow from given directories in batches

In [None]:
# Flow training images in batches of 128 using train_datagen generator
train_data_flower = train_datagen.flow_from_directory(
  # filepath of the training data
  '/tmp/train',
  target_size = (224,224),  
  batch_size = 128,
  #? Since we use `categorical_crossentropy` loss, we need `categorical` labels
  class_mode = 'categorical'
)

validate_data_flower = validate_datagen.flow_from_directory(
  '/tmp/validation',
  target_size = (224,224),  
  batch_size = 128,
  class_mode = 'categorical'
)

### Define the CNN model

In [None]:
model = tf.keras.models.Sequential([
  # The input shape is the desired size of the image 300x300 with 3 bytes color
  # 1st convolution
  tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(224, 224, 3)),
  tf.keras.layers.MaxPooling2D(2, 2),
  # 2nd convolution
  tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2),
  # 3rd convolution
  tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2),
  # 4th convolution
  tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2),
  # Flatten the results to feed into a DNN
  tf.keras.layers.Flatten(),
  # 512 neuron hidden layer
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dense(3, activation='softmax')
])

# This will print a summary of your model when you're done!
model.summary()

In [None]:
# certain loss functions and optimizers may work best for certain situations
model.compile(
  loss = 'categorical_crossentropy',
  optimizer = tf.keras.optimizers.RMSprop(learning_rate = 0.0001),
  metrics = ['accuracy']
)

### Train and evaluate the model

In [None]:
model.fit(
  train_data_flower, 
  epochs = 8,
  verbose = 1,
  validation_data = validate_data_flower
)