In [2]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'plant-disease-recognition-dataset:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F1447507%2F2394131%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240927%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240927T193255Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D353110a6aa8b8dc4f9858164eada952d1c311704ee79d5e45354500665f858ebe05e6cbfda579cd52f2206ce7b536bad6618458f5da269daa87443fe63a15d125b58b3c887e2d92993fb2d7173b2bb9431acde84b3317bf7f142122370b2a360b408f6915392b9409efee2913179067eb54039a927e4193738cc3a829e3b96e5d5a381a2dd7e0c00c7e481385608f7b57692b259efa2c0d4a9085c16d44d76b4b7b5944013f69379f8b18de1a617a3a6843d0a57eb3bd722b8782efc943af4dc5567d1b021fa9af04e05531cfba06b0db6cded8e2a5fee8977713d757af8f5802adb21c477603bb6880742a370c91f82409a2a5ab31843093524091d93342973'

KAGGLE_INPUT_PATH='kaggle/input'
KAGGLE_WORKING_PATH='kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading plant-disease-recognition-dataset, 1339624018 bytes compressed
Downloaded and uncompressed: plant-disease-recognition-dataset
Data source import complete.


<div style="font-family: Calibri, serif; text-align: center;">
    <hr style="border: none;
               border-top: 15px solid orange;
               width: 100%;
               margin-bottom: 20px;
               margin-left: 45;
               height: 20%">
    <div style="font-size: 56px;"><b>🧠 Convolutional<br>Neural Network<br>From Scratch</b></div><br>
        <hr style="border: none;
               border-top: 15px solid orange;
               width: 100%;
               margin-bottom: 20px;
               margin-left: 45;
               height: 20%"> <br>
    <div style="font-weight: bold;
                text-transform: uppercase;
                margin-top: 20px;
                letter-spacing: 2.5px;
                ">2023 | <a href ="https://www.kaggle.com/lusfernandotorres/">© Luis Fernando Torres</a></div>
</div>

<div style="font-family: Calibri, serif; text-align: left;">
    <hr style="border: none;
               border-top: 2px solid orange;
               width: 100%;
               margin-top: 30px;
               margin-bottom: 20px;
               margin-left: 0;">
    <div style="font-size: 16px; letter-spacing: 1.5px;"><b>Table of Contents</b></div>
</div>

- [Introduction](#intro)<br><br>
    - [Convolutional Layer](#convolution)<br><br>
    - [Pooling Layer](#pooling)<br><br>
    - [Fully-connected Layer](#fc)<br><br>
- [CNNS and Computer Vision](#cv)<br><br>
- [This Notebook](#goal)<br><br>
- [Exploring the Data](#eda)<br><br>
- [Preprocessing](#preprocess)<br><br>
- [Data Augmentation](#augmentation)<br><br>
- [Building the Convolutional Neural Network](#build)<br><br>
- [Validating Performance](#val)<br><br>
- [Conclusion](#conclusion)

<div id = 'intro'
     style="font-family: Calibri, serif; text-align: left;">
    <hr style="border: none;
               border-top: 2.85px solid orange;
               width: 100%;
               margin-top: 62px;
               margin-bottom: auto;
               margin-left: 0;">
    <div style="font-size: 38px; letter-spacing: 2.25px;"><b>Introduction</b></div>
</div>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b><i>Convolutional Neural Networks (CNNs or ConvNets)</i></b> are specialized neural architectures that is predominantly used for several <b>computer vision</b> tasks, such as image classification and object recognition. These neural networks harness the power of <i>Linear Algebra</i>, specifically through convolution operations, to identify patterns within images.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Convolutional neural networks have three main kinds of layers, which are:</p>
          
<div style = "margin-left: 25px;">
    
  <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">• Convolutional layer</p>
          
  <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">• Pooling layer</p>
          
  <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">• Fully-connected layer</p></div>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">The convolutional layer is the first layer of the network, while the fully-connected layer is the final layer, responsible for the output. The first convolutional layer may be followed by several additional convolutional layers or pooling layers; and with each new layer, the more complex is the CNN.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">As the CNN gets more complex, the more it excels in identifying greater portions of the image. Whereas earlier layers focus on the simple features, such as colors and edges; as the image progresses through the network, the CNN starts to recognize larger elements and shapes, until finally reaching its main goal. </p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">The image below displays the structure of a CNN. We have an input image, followed by Convolutional and Pooling layers, where the feature learning process happens. Later on, we have the layers responsible for the task of classifying whether the vehicle in the input data is a car, truck, van, bicycle, etc.</p>       

<center>
    <img src = "https://miro.medium.com/v2/resize:fit:1358/1*XbuW8WuRrAY5pC4t-9DZAQ.jpeg">
<p style = "font-size: 16px;
            font-family: 'Georgia', serif;
            text-align: center;
            margin-top: 10px;">Image displaying the structure of a Convolutional Neural Networks. <br> Source: <a href = "https://medium.com/@RaghavPrabhu/understanding-of-convolutional-neural-network-cnn-deep-learning-99760835f148">Understanding of Convolutional Neural Network (CNN) — Deep Learning</a></p>
</center>

<p id = 'convolution' style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>Convolutional Layer</b></p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">The convolutional layer is the most important layer of a CNN; responsible for dealing with the major computations. The convolutional layer includes <b>input data</b>, <b> a filter</b>, and <b>a feature map</b>.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">To illustrate how it works, let's assume we have a color image as input. This image is made up of a matrix of pixels in 3D, representing the three dimensions of the image: height, width, and depth.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">The filter—which is also referred to as kernel—is a two-dimensional array of weights, and is typically a $3\times3$ matrix. It is applied to a specific area of the image, and a <b>dot product</b> is computed between the input pixels and the weights in the filter. Subsequently, the filter shifts by a stride, and this whole process is repeated until the kernel slides through the entire image, resulting in an output array.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">The resulting output array is also known as a feature map, activation map, or convolved feature.</p>

<center>
    <img src = "https://miro.medium.com/v2/resize:fit:1358/1*D6iRfzDkz-sEzyjYoVZ73w.gif">
<p style = "font-size: 16px;
            font-family: 'Georgia', serif;
            text-align: center;
            margin-top: 10px;">GIF displaying the convolutional process. First, we have a $5\times5$ matrix—pixels in the input image—with a $3\times3$ filter. The result of the operation is the output array.<br> Source: <a href = "https://medium.datadriveninvestor.com/convolutional-neural-networks-3b241a5da51e">Convolutional Neural Networks</a></p>
</center>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">It is important to note that the weights in the filter remain fixed as it moves across the image. The weights values are adjusted during the training process due to backpropagation and gradient descent.</p>
          
<p style="font-family: Calibri, serif; text-align: left;font-size: 24px; letter-spacing: .85px;">Besides the weights in the filter, we have other three important parameters that need to be set before the training begins:</p>
          
<div style = "margin-left: 25px;">
    
  <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>• Number of Filters:</b> This parameter is responsible for defining the depth of the output. If we have three distinct filters, we have three different feature maps, creating a depth of three.</p>
          
  <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>• Stride:</b> This is the distance, or number of pixels, that the filter moves over the input matrix.  </p>
          
  <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>• Zero-padding:</b> This parameter is usually used when the filters do not fit the input image. This sets all elements outside the input matrix to zero, producing a larger or equally sized output. There are three different kinds of padding:</p>

<div style = "margin-left: 34px;">
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>■ Valid padding:</b> Also known as <i>no padding</i>. In this specific case, the last convolution is dropped if the dimensions do not align.</p>
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>■ Same padding:</b> This padding ensures that the output layer has the exact same size as the input layer.</p>
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>■ Full padding:</b> This kind of padding increases the size of the output by adding zeros to the borders of the input matrix.</p></div></div>

<p style="font-family: Calibri, serif; text-align: left;font-size: 24px; letter-spacing: .85px;">After each convolution operation, we have the application of a <i><b>Rectified Linear Unit (ReLU)</b></i> function, which transforms the feature map and introduces nonlinearity.</p>

<center>
    <img src = "https://www.researchgate.net/profile/Leo-Pauly/publication/319235847/figure/fig3/AS:537056121634820@1505055565670/ReLU-activation-function.png">
<p style = "font-size: 16px;
            font-family: 'Georgia', serif;
            text-align: center;
            margin-top: 10px;"><i>ReLU</i> activation function:<br> $f(u)$ = $\begin{cases} 0 & \text{if } u \leq 0\\ u & \text{if } u > 0 \end{cases}$<br> <br>Source: <a href = "https://www.researchgate.net/figure/ReLU-activation-function_fig3_319235847">ResearchGate</a></p>
</center>

<p style="font-family: Calibri, serif; text-align: left;font-size: 24px; letter-spacing: .85px;">As mentioned earlier, the initial convolutional layer can be followed by additional convolutional layers.</p>

<p style="font-family: Calibri, serif; text-align: left;font-size: 24px; letter-spacing: .85px;">The subsequent convolutional layers can see the pixels within the receptive fields of the prior layers, which helps to extract and interpret additional patterns.</p>

<p id = 'pooling' style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>Pooling Layer</b></p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">The pooling layer is responsible for reducing the dimensionality of the input. It also slides a filter across the entire input—without any weights—to populate the output array. We have two main types of pooling:</p>
          
<div style = "margin-left: 25px;">
    
  <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>• Max Pooling:</b> As the filter slides through the input, it selects the pixel with the highest value for the output array.</p>

  <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>• Average Pooling:</b> The value selected for the output is obtained by computing the average within the receptive field.</p>
</div>

<center>
    <img src = "https://www.researchgate.net/profile/Imran-Ali-12/publication/340812216/figure/fig4/AS:928590380138496@1598404607456/Pooling-layer-operation-oproaches-1-Pooling-layers-For-the-function-of-decreasing-the.png">
<p style = "font-size: 16px;
            font-family: 'Georgia', serif;
            text-align: center;
            margin-top: 10px;">Illustration of the pooling process.<br>Source: <a href = "https://www.researchgate.net/figure/Pooling-layer-operation-oproaches-1-Pooling-layers-For-the-function-of-decreasing-the_fig4_340812216">ResearchGate</a></p>
</center>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">The pooling layer serves the purpose of reducing complexity, improving efficiency, and limiting the risk of overfitting.</p>

<p id = 'fc' style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>Fully-connected Layer</b></p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">This is the layer responsible for performing the task classification based on the features extracted during the previous layers. While both convolutional and pooling layers tend to use $ReLU$ functions, fully-connected layers use the <i><b>Softmax</b></i> activation function for classification, producing a probability from <i>0</i> to <i>1</i>.

<center>
    <img src = "https://www.researchgate.net/profile/Binghui-Chen/publication/319121953/figure/fig2/AS:527474636398592@1502771161390/Softmax-activation-function.png">
<p style = "font-size: 16px;
            font-family: 'Georgia', serif;
            text-align: center;
            margin-top: 10px;"><i>Softmax</i> activation function graph.<br>Source: <a href = "https://www.researchgate.net/figure/Softmax-activation-function_fig2_319121953">ResearchGate</a></p>
</center>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">
\begin{equation}
    \sigma(z_i) = \frac{e^{z_{i}}}{\sum_{j=1}^K e^{z_{j}}} \ \ \
    \end{equation}</p>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">
Where:</p>

- <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">
$\sigma{(z_i)}$ = The softmax function applied to the $i^{th}$ element of the input vector. This value ranges between 0 and 1.</p>

- <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">
$e^{z_i}$ = The exponential function applied to the $i^{th}$ element of the input vector.</p>

- <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">
$\sum_{j=1}^K e^{z_{j}}$ = The sum of the exponential of each element in the input vector from to $K$, where $K$ is the total number of classes/labels.</p>

<div id = 'cv'
     style="font-family: Calibri, serif; text-align: left;">
    <hr style="border: none;
               border-top: 2.85px solid orange;
               width: 100%;
               margin-top: 62px;
               margin-bottom: auto;
               margin-left: 0;">
    <div style="font-size: 38px; letter-spacing: 2.25px;"><b><i>CNNs</i> and Computer Vision</b></div>
</div>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Due to its power in image recognition tasks, CNNs have been highly effective in many fields related to <i><b>Computer Vision</b></i>.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Computer Vision is a field of AI that enables computers to extract information from digital images, videos, and other visual inputs. Some common applications of computer vision today can be seen across several industries, including the following:</p>
          
<div style = "margin-left: 25px;">
    
  <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• Social Media:</b> <i>Google</i>, <i>Meta</i>, and <i>Apple</i> use these systems to identify people in a photograph, making it easier to organize photo albums and tag friends.</p>

  <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>• Healthcare:</b> Computer vision models have been used to help doctors identifying cancerous tumors in patients, as well as other conditions.</p>
    
  <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>• Agriculture:</b> Drones equipped with cameras can monitor the health of vast farmlands to identify areas that need more water or fertilizers.</p>    
    
  <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>• Security:</b> Surveillance systems can detect unusual and suspect activities  in real time.</p>     
    
  <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>• Finance:</b> Computer vision models may be used to identify relevant patterns in candlestick charts to predict price movements.</p>        

  <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>• Automotive:</b> Computer vision is an essential component of the research leading to self-driving cars.</p>   
    
</div>

<div id = 'goal'
     style="font-family: Calibri, serif; text-align: left;">
    <hr style="border: none;
               border-top: 2.85px solid orange;
               width: 100%;
               margin-top: 62px;
               margin-bottom: auto;
               margin-left: 0;">
    <div style="font-size: 38px; letter-spacing: 2.25px;"><b>This Notebook</b></div>
</div>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Nowadays, there are several <i>pre-trained</i> CNNs available for many tasks. Models like <i><b>ResNet</b></i>, <i><b>VGG16</b></i>, <i><b>InceptionV3</b></i>, as well as many others, are highly efficient in most computer vision tasks we currently perform across industries.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">In this notebook, however, I would like to explore the process of building a simple, yet effective, Convolutional Neural Network from scratch. For this task, I will use <b><i>Keras</i></b> to help us build a neural network that can accurately identify diseases in a plant through images.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">I am going to use the <a href ="https://www.kaggle.com/datasets/rashikrahmanpritom/plant-disease-recognition-dataset/">Plant Disease Recognition Dataset</a>, which contains 1,530 images divided into train, test, and validation sets. The images are labeled as <i>“Healthy“</i>, <i>“Rust“</i>, and <i>“Powdery“</i> to describe the conditions of the plants.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Very briefly, each class means the following:</p>
          
<div style = "margin-left: 25px;">
    
  <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• Rust:</b> These are plant diseases caused by Pucciniales fungi, which cause severe deformities to the plant.</p>
    
  <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• Powdery:</b> Powdery mildews are caused by Erysphales fungi, posing a threat to agriculture and horticulture by reducing crop yields.</p>
    
  <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• Healthy:</b> Naturally, these are the plants that are free from diseases.</p>
</div>

In [9]:
# Importing Libraries

# Data Handling
import pandas as pd
import numpy as np
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor

# Efficient Looping
import itertools

# Traceback for diagnosis
import traceback

# Data Visualization
import plotly.express as px
import plotly.graph_objs as go
import plotly.subplots as sp
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import plotly.io as pio
from IPython.display import display
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

# Statistics & Mathematics
import scipy.stats as stats
import statsmodels.api as sm
from scipy.stats import shapiro, skew, anderson, kstest
import math

# Feature Selection
from sklearn.feature_selection import (
    RFECV, SelectKBest, chi2, f_classif, f_regression,
    mutual_info_classif, mutual_info_regression
)

# Machine Learning Pipeline
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.base import BaseEstimator, TransformerMixin,ClassifierMixin

# Preprocessing data
from sklearn.preprocessing import RobustScaler, StandardScaler, QuantileTransformer, FunctionTransformer

from sklearn.compose import ColumnTransformer
from sklearn.base import BaseEstimator, TransformerMixin

# Model Selection for Cross Validation
from sklearn.model_selection import (
    StratifiedKFold, KFold,
    RepeatedKFold, RepeatedStratifiedKFold,
    train_test_split, TimeSeriesSplit
)

# Machine Learning metrics
from sklearn.metrics import (
    mean_squared_error,
    r2_score,
    mean_absolute_error,
    cohen_kappa_score,
    make_scorer,
    roc_curve,
    auc,
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    confusion_matrix
)

# ML regressors
from sklearn.linear_model import HuberRegressor,RANSACRegressor, TheilSenRegressor, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR, NuSVR, LinearSVR
from sklearn.ensemble import (
    HistGradientBoostingRegressor, StackingRegressor,
    AdaBoostRegressor, RandomForestRegressor, ExtraTreesRegressor,
    GradientBoostingRegressor, StackingRegressor, VotingRegressor
    )
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor


# ML classifiers
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.svm import SVC, NuSVC, LinearSVC
from sklearn.ensemble import (
    HistGradientBoostingClassifier, AdaBoostClassifier,
    RandomForestClassifier, GradientBoostingClassifier,
    StackingClassifier, VotingClassifier,ExtraTreesClassifier
    )
from sklearn.tree import DecisionTreeClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

# Clustering algorithms
from sklearn.cluster import KMeans

# Fine-tuning
import optuna

# Randomizer
import random

# Encoder of categorical variables
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

# OS
import os

# Image package
from PIL import Image

# Hiding warnings
import warnings
warnings.filterwarnings("ignore")

In [10]:
# Importing Keras
from keras.models import Sequential                          # Neural network model as a sequence of layers.
from keras.layers import Conv2D                              # Convolutional layer
from keras.layers import MaxPooling2D                        # Max pooling layer
from keras.layers import Flatten                             # Layer used to flatten 2D arrays for fully-connected layers.
from keras.layers import Dense                               # This layer adds fully-connected layers to the neural network.
from keras.layers import Dropout                             # This serves to prevent overfitting by dropping out a random set of activations.
from keras.layers import BatchNormalization                  # This is used to normalize the activations of the neurons.
from keras.layers import Activation                          # Layer for activation functions
from keras.callbacks import EarlyStopping, ModelCheckpoint   # Classes used to save weights and stop training when improvements reach a limit
from keras.models import load_model                          # This helps us to load trained models
# Preprocessing layers
from keras.layers import Rescaling                           # This layer rescales pixel values

# Importing TensorFlow
import tensorflow as tf

In [11]:
# Configuring notebook
seed = 123
paper_color = '#EEF6FF'
bg_color = '#EEF6FF'
#colormap =
#template =

In [12]:
def image_resizer(paths):
    """
    This function resizes the input images
    """
    with ThreadPoolExecutor() as executor:
        resized_images = list(executor.map(lambda x: Image.open(x).resize((350,250)), paths))
    return resized_images

def plot_images_list(images, title, subtitle):
    '''
    This functions helps to plot a matrix of images in a list
    '''
    fig = sp.make_subplots(rows=3, cols=3)
    images = image_resizer(images)

    traces = []
    for i in range(min(9, len(images))):
        img = go.Image(z=images[i])
        traces.append((img, i//3+1, i%3+1))

    fig.add_traces([trace[0] for trace in traces],
                  rows = [trace[1] for trace in traces],
                  cols = [trace[2] for trace in traces])

    fig.update_layout(
        title={'text': f'<b>{title}<br>  <i><sub>{subtitle}</sub></i></b>',
               'font': dict(size = 22)},
        height=800,
        width=800,
        margin=dict(t=110, l=80),
        plot_bgcolor=bg_color,paper_bgcolor=paper_color
        #template=template
    )
    fig.show()

In [13]:
# Configuring GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
        print('\nGPU Found! Using GPU...')
    except RuntimeError as e:
        print(e)
else:
    strategy = tf.distribute.get_strategy()
    print('Number of replicas:', strategy.num_replicas_in_sync)


GPU Found! Using GPU...


2024-09-27 14:44:09.243632: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Ultra
2024-09-27 14:44:09.243698: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 128.00 GB
2024-09-27 14:44:09.243761: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 48.00 GB
2024-09-27 14:44:09.243829: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-09-27 14:44:09.243876: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


<div id = 'eda'
     style="font-family: Calibri, serif; text-align: left;">
    <hr style="border: none;
               border-top: 2.85px solid orange;
               width: 100%;
               margin-top: 62px;
               margin-bottom: auto;
               margin-left: 0;">
    <div style="font-size: 38px; letter-spacing: 2.25px;"><b>Exploring the Data</b></div>
</div>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Before building our Convolutional Neural Network, it is helpful to perform a brief, yet efficient, analysis of the data we have at hand. Let's start by loading the directories for each set.</p>

In [16]:
# Loading training, testing, and validation directories
train_dir = 'kaggle/input/plant-disease-recognition-dataset/Train/Train'
test_dir = 'kaggle/input/plant-disease-recognition-dataset/Test/Test'
val_dir = 'kaggle/input/plant-disease-recognition-dataset/Validation/Validation'

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">We may also count the files inside each subfolder to compute the total of data we have for training and testing, as well as measure the degree of class imbalance.</p>

In [17]:
# Giving names to each directory
directories = {
    train_dir: 'Train',
    test_dir: 'Test',
    val_dir: 'Validation'
    }

# Naming subfolders
subfolders = ['Healthy', 'Powdery', 'Rust']

print('\n* * * * * Number of files in each folder * * * * *\n')

# Counting the total of pictures inside each subfolder and directory
for dir, name in directories.items():
    total = 0
    for sub in subfolders:
        path = os.path.join(dir, sub)
        num_files = len([f for f in os.listdir(path) if os.path.join(path, f)])
        total += num_files
        print(f'\n{name}/{sub}: {num_files}')
    print(f'\n  Total: {total}')
    print("-" * 80)


* * * * * Number of files in each folder * * * * *


Train/Healthy: 458

Train/Powdery: 430

Train/Rust: 434

  Total: 1322
--------------------------------------------------------------------------------

Test/Healthy: 50

Test/Powdery: 50

Test/Rust: 50

  Total: 150
--------------------------------------------------------------------------------

Validation/Healthy: 20

Validation/Powdery: 20

Validation/Rust: 20

  Total: 60
--------------------------------------------------------------------------------


<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">We have a total of <b>1,322</b> files inside the <code>Train</code> directory and there are no large imbalances between classes. A small variation between them is fine, and a simple metric such as <i>Accuracy</i> may be enough to measure performance.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">For the testing set, we have a total of <b>150</b> images, whereas the validation set consists of <b>60</b> images in total. Both sets have a perfect class balance.</p>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Convolutional Neural Networks require a fixed size for all images we feed into it. This means that every single image in our dataset must be equally sized, either $128 \times 128$, $224 \times 224$, and so on.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">We can also check if our data meets this requirement, or if it will be necessary to perform some preprocessing in this regard before modeling.</p>          

In [18]:
unique_dimensions = set()

for dir, name in directories.items():
    for sub in subfolders:
        folder_path = os.path.join(dir, sub)

        for file in os.listdir(folder_path):
            image_path = os.path.join(folder_path, file)
            with Image.open(image_path) as img:
                unique_dimensions.add(img.size)

if len(unique_dimensions) == 1:
    print(f"\nAll images have the same dimensions: {unique_dimensions.pop()}")
else:
    print(f"\nFound {len(unique_dimensions)} unique image dimensions: {unique_dimensions}")


Found 8 unique image dimensions: {(4032, 3024), (4000, 2672), (4000, 3000), (5184, 3456), (2592, 1728), (3901, 2607), (4608, 3456), (2421, 2279)}


<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">We have 8 different dimensions across the dataset. In the next cell, I am going to check the distribution of these dimensions across the data.</p>         

In [19]:
# Checking if all the images in the dataset have the same dimensions
dims_counts = defaultdict(int)

for dir, name in directories.items():
    for sub in subfolders:
        folder_path = os.path.join(dir, sub)

        for file in os.listdir(folder_path):
            image_path = os.path.join(folder_path, file)
            with Image.open(image_path) as img:
                dims_counts[img.size] += 1

for dimension, count in dims_counts.items():
    print(f"\nDimension {dimension}: {count} images")


Dimension (4000, 2672): 1130 images

Dimension (4000, 3000): 88 images

Dimension (2421, 2279): 1 images

Dimension (4608, 3456): 72 images

Dimension (2592, 1728): 127 images

Dimension (5184, 3456): 97 images

Dimension (4032, 3024): 16 images

Dimension (3901, 2607): 1 images


<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">It seems that most images have dimensions of $4000 \times 2672$, which is a <b>rectangular shape</b>. We can conclude that, due to the differences in dimensions, we will need to apply some preprocessing to the data.</p>         
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">First, we are going to resize the images, so they all have the same shape. Then, we will transform the input from rectangular shape to <b>square</b> shape.</p>         

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Another crucial consideration is verifying the pixel valye range of the images. In this case, all images should have pixel values spanning from <b>0</b> to <b>255</b>. This consistency simplifies the preprocessing step, since we often normalize pixel values in images to a range going from 0 to 1.</p>         

In [20]:
# Checking images dtype
all_uint8 = True
all_in_range = True

for dir, name in directories.items():
    for sub in subfolders:
        folder_path = os.path.join(dir, sub)

        for file in os.listdir(folder_path):
            image_path = os.path.join(folder_path, file)
            with Image.open(image_path) as img:
                img_array = np.array(img)

            if img_array.dtype == 'uint8':
                all_uint8 = False

            if img_array.min() < 0 or img_array.max() > 255:
                all_in_range = False

if all_uint8:
    print(" - All images are of data type uint8\n")
else:
    print(" - Not all images are of data type uint8\n")

if all_in_range:
    print(" - All images have pixel values ranging from 0 to 255")
else:
    print(" - Not all images have the same pixel values from 0 to 255")

 - Not all images are of data type uint8

 - All images have pixel values ranging from 0 to 255


<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Even though not all images are of the same data type, <code>uint8</code>, it is fairly easy to guarantee that they will have the same data type once we load images into datasets. We confirmed, though, that all the images have pixel values ranging from 0 to 255, which is great news.</p>         
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Before moving on to the <i>Preprocessing</i> step, let's plot some images from each class to see what they look like.</p>         

In [32]:
# Loading the directory for each class in the training dataset
train_healthy_dir = train_dir + "/" + 'Healthy'
train_rust_dir = train_dir + "/" + 'Rust'
train_powdery_dir = train_dir + "/" + 'Powdery'

# Selecting 9 random pictures from each directory
healthy_files = random.sample(os.listdir(train_healthy_dir), 9)
rust_files = random.sample(os.listdir(train_rust_dir), 9)
powdery_files = random.sample(os.listdir(train_powdery_dir), 9)

In [34]:
# Plotting healthy plants
healthy_images = [os.path.join(train_healthy_dir, f) for f in healthy_files]
plot_images_list(healthy_images, "Healthy Plants", "Training Dataset")

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [None]:
# Plotting rust plants
rust_images = [os.path.join(train_rust_dir, f) for f in rust_files]
plot_images_list(rust_images, "Rust Plants", "Training Dataset")

In [None]:
# Plotting powdery plants
powdery_images = [os.path.join(train_powdery_dir, f) for f in powdery_files]
plot_images_list(powdery_images, "Powdery Plants", "Training Dataset")

<div id = 'preprocess'
     style="font-family: Calibri, serif; text-align: left;">
    <hr style="border: none;
               border-top: 2.85px solid orange;
               width: 100%;
               margin-top: 62px;
               margin-bottom: auto;
               margin-left: 0;">
    <div style="font-size: 38px; letter-spacing: 2.25px;"><b>Preprocessing</b></div>
</div>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">For those familiar with tabular data, preprocessing is probably one of the most daunting steps of dealing with neural networks and unstructured data.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">This task can be fairly easy by using TensorFlow's <code>image_dataset_from_directory</code>, which loads images from the directories as a <b>TensorFlow Dataset</b>. This resulting dataset can be manipulated for batching, shuffling, augmentating, and several other preprocessing steps. </p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">I suggest you check <a href = "https://www.tensorflow.org/api_docs/python/tf/keras/utils/image_dataset_from_directory">this link</a> for more information on the <code>image_dataset_from_directory</code> function.</p>

In [35]:
# Creating a Dataset for the Training data
train = tf.keras.utils.image_dataset_from_directory(
    train_dir,  # Directory where the Training images are located
    labels = 'inferred', # Classes will be inferred according to the structure of the directory
    label_mode = 'categorical',
    class_names = ['Healthy', 'Powdery', 'Rust'],
    batch_size = 16,    # Number of processed samples before updating the model's weights
    image_size = (256, 256), # Defining a fixed dimension for all images
    shuffle = True,  # Shuffling data
    seed = seed,  # Random seed for shuffling and transformations
    validation_split = 0, # We don't need to create a validation set from the training set
    crop_to_aspect_ratio = True # Resize images without aspect ratio distortion
)

Found 1322 files belonging to 3 classes.


In [36]:
# Creating a dataset for the Test data
test = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    labels = 'inferred',
    label_mode = 'categorical',
    class_names = ['Healthy', 'Powdery', 'Rust'],
    batch_size = 16,
    image_size = (256, 256),
    shuffle = True,
    seed = seed,
    validation_split = 0,
    crop_to_aspect_ratio = True
)

Found 150 files belonging to 3 classes.


In [37]:
# Creating a dataset for the Test data
validation = tf.keras.utils.image_dataset_from_directory(
    val_dir,
    labels = 'inferred',
    label_mode = 'categorical',
    class_names = ['Healthy', 'Powdery', 'Rust'],
    batch_size = 16,
    image_size = (256, 256),
    shuffle = True,
    seed = seed,
    validation_split = 0,
    crop_to_aspect_ratio = True
)

Found 60 files belonging to 3 classes.


<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">We have successfully captured all files within each set for each of the three classes. We can also print these datasets for a further understanding of their structure.</p>

In [38]:
print('\nTraining Dataset:', train)
print('\nTesting Dataset:', test)
print('\nValidation Dataset:', validation)


Training Dataset: <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(None, 3), dtype=tf.float32, name=None))>

Testing Dataset: <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(None, 3), dtype=tf.float32, name=None))>

Validation Dataset: <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(None, 3), dtype=tf.float32, name=None))>


<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Let's explore a bit deeper what all the information above means.</p>
          
<div style = "margin-left: 25px;">
    
  <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• _BatchDataset:</b> It indicates that the dataset returns data in batches.</p>
    
   <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• element_spec:</b> This describes the structure of the elements in the dataset.</p>

  <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>• TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name = None):</b> This represents the features, in this case the images, in the dataset. <code>None</code> represents the batch size, which is <i>None</i> here because it can vary depending on how many samples we have in the last batch; <code>256, 256</code> represents the height and width of the images; <code>3</code> is the number of channels in the images, indicating they are RGB images. Last, <code>dtype=tf.float32</code> tells us that the data type of the image pixels is a 32-bit floating point.</p>
    
  <p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;"><b>• TensorSpec(shape=(None, 3), dtype=tf.float32, name=None):</b> This represents the labels/targets of our dataset. Here, <code>None</code> refers to the batch size; <code>3</code> refers to the number of labels in the dataset; whilst <code>dtype=tf.float32</code> is also a 32-bit floating point.</p>
</div>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">By using the <code>image_dataset_from_directory</code> function, we have been able to automatically preprocess some aspects of the data. For instance, all the images are now of the same data type, <code>tf.float32</code>. By setting <code>image_size = (256, 256)</code>, we have ensured that all images have the same dimensions, $256 \times 256$.</p>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Another important step for preprocessing is ensuring that the pixel values of our images are within a 0 to 1 range. The <code>image_dataset_from_directory</code> method performed some transformations already, but the pixel values are still in the 0 to 255 range.</p>

In [39]:
# Checking minimum and maximum pixel values in the Validation dataset
min_value = float('inf')
max_value = -float('inf')

for img, label in validation:
    batch_min = tf.reduce_min(img)
    batch_max = tf.reduce_max(img)

    min_value = min(min_value, batch_min.numpy())
    max_value = max(max_value, batch_max.numpy())

print('\nMinimum pixel value in the Validation dataset', min_value)
print('\nMaximum pixel value in the Validation dataset', max_value)


Minimum pixel value in the Validation dataset 0

Maximum pixel value in the Validation dataset 255


2024-09-27 14:49:57.670300: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">To bring the pixel values to the 0 to 1 range, we can easily use one of Keras' preprocessing layers, <code>tf.keras.layers.Rescaling</code>.</p>

In [40]:
scaler = Rescaling(1./255) # Defining scaler values between 0 to 1

In [41]:
# Rescaling datasets
train = train.map(lambda x, y: (scaler(x), y))
test = test.map(lambda x, y: (scaler(x), y))
validation = validation.map(lambda x, y: (scaler(x), y))

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Now we can once more visualize the minimum and maximum pixel values in the validation set.</p>

In [42]:
# Checking minimum and maximum pixel values in the Validation dataset
min_value = float('inf')
max_value = -float('inf')

for img, label in validation:
    batch_min = tf.reduce_min(img)
    batch_max = tf.reduce_max(img)

    min_value = min(min_value, batch_min.numpy())
    max_value = max(max_value, batch_max.numpy())

print('\nMinimum pixel value in the Validation dataset', min_value)
print('\nMaximum pixel value in the Validation dataset', max_value)


Minimum pixel value in the Validation dataset 0.0

Maximum pixel value in the Validation dataset 1.0


2024-09-27 14:50:25.045885: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


<div id = 'augmentation'
     style="font-family: Calibri, serif; text-align: left;">
    <hr style="border: none;
               border-top: 2.85px solid orange;
               width: 100%;
               margin-top: 62px;
               margin-bottom: auto;
               margin-left: 0;">
    <div style="font-size: 38px; letter-spacing: 2.25px;"><b>Data Augmentation</b></div>
</div>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">When working with image data, it is usually a good practice to artificially introduce some diversity to the sample by applying random transformations to the images used in training. This is good because it helps to expose the model to a wider variety of images and avoids overfitting.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Keras has about seven different layers for image data augmentation. These are:</p>

<div style = "margin-left: 25px;">
    
  <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• <a href ="https://keras.io/api/layers/preprocessing_layers/image_augmentation/random_crop#randomcrop-class">tf.keras.layers.RandomCrop</a></b>: This layer randomly chooses a location to crop images down to a target size. </p>
    
  <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• <a href ="https://keras.io/api/layers/preprocessing_layers/image_augmentation/random_flip/">tf.keras.layers.RandomFlip</a></b>: This layer randomly flips images horizontally and or vertically based on the <code>mode</code> attribute.</p>
    
  <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• <a href ="https://keras.io/api/layers/preprocessing_layers/image_augmentation/random_translation/">tf.keras.layers.RandomTranslation</a></b>: This layer randomly applies translations to each image during training according to the <code>fill_mode</code> attribute.</p>
    
  <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• <a href ="https://keras.io/api/layers/preprocessing_layers/image_augmentation/random_brightness/">tf.keras.layers.RandomBrightness</a></b>: This layer randomly increases/reduces the brightness for the input RGB images. </p>
    
  <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• <a href ="https://keras.io/api/layers/preprocessing_layers/image_augmentation/random_rotation/">tf.keras.layers.RandomRotation</a></b>: This layer randomly rotates the images during training, and also fills empty spaces according to the <code>fill_mode</code> attribute. </p>
    
  <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• <a href ="https://keras.io/api/layers/preprocessing_layers/image_augmentation/random_zoom/">tf.keras.layers.RandomZoom</a></b>: This layer randomly zooms in or out on each axis of each image independently during training. </p>
    
  <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• <a href ="https://keras.io/api/layers/preprocessing_layers/image_augmentation/random_contrast/">tf.keras.layers.RandomContrast</a></b>: This layer randomly adjusts contrast by a random factor during training in or out on each axis of each image independently during training. </p>
</div>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">For this task, I am going to apply <code>RandomRotation</code>, <code>RandomContrast</code>, as well as <code>RandomBrightness</code> to our images.</p>

In [43]:
# Creating data augmentation pipeline
augmentation = tf.keras.Sequential(
    [
        tf.keras.layers.RandomRotation(
        factor = (-.25, .3),
        fill_mode = 'reflect',
        interpolation = 'bilinear',
        seed = seed),


        tf.keras.layers.RandomBrightness(
        factor = (-.45, .45),
        value_range = (0.0, 1.0),
        seed = seed),

        tf.keras.layers.RandomContrast(
        factor = (.5),
        seed = seed)
    ]
)

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">We can also use an <code>input_shape</code> as example to build the pipeline above and plot it below to illustrate how it looks.</p>

In [45]:
augmentation.build((None, 256, 256, 3)) # Building model
# Plotting model
tf.keras.utils.plot_model(augmentation,
                          show_shapes = True,
                          show_layer_names = True,
                          expand_nested = True)

You must install pydot (`pip install pydot`) for `plot_model` to work.


<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">We are going to attach this data augmentation pipeline to our convolutional neural network. It is important to remember that the data augmentation pipeline is inactive during testing, and the input samples will only be augmented during <code>fit()</code>, not when calling <code>predict()</code>. </p>

<div id = 'build'
     style="font-family: Calibri, serif; text-align: left;">
    <hr style="border: none;
               border-top: 2.85px solid orange;
               width: 100%;
               margin-top: 62px;
               margin-bottom: auto;
               margin-left: 0;">
    <div style="font-size: 38px; letter-spacing: 2.25px;"><b>Building the Convolutional Neural Network</b></div>
</div>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">To build the Convolutional Neural Network with Keras, we are going to use the <code>Sequential</code> class. This class allows us to build a linear stack of layers, which is essential for the creation of neural networks.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Besides the Convolutional, Pooling, and Fully-Connected Layers, which we have previously explored, I am also going to add the following layers to the network:</p>

<div style = "margin-left: 25px;">
    
  <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• BatchNormalization</b>: This layer applies a transformation that maintains the mean output close to $0$ and the standard deviation close to $1$. It normalizes its inputs and is important to help convergence and generalization.</p>
    
   <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• Dropout</b>: This layer randomly sets a fraction of input units to $0$ during training, which helps to prevent overfitting.</p>

   <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• Flatten</b>: This layer transforms a multi-dimensional tensor into a one-dimensional tensor. It is used when transitioning from the <b>Feature Learning</b> segment — Convolutional and Pooling layers — to the fully-connected layers.</p>    
</div>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">I plan to use different kernel sizes, both $3 \times 3$ and $5 \times 5$. This may allow the network to capture features at multiple scales.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">I am also gradually increasing the <i>dropout rates</i> as we advance through the process and the increase in the number of kernels.</p>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">With that being said, let's go ahead and build our ConvNet.</p>

In [46]:
# Initiating model on GPU
with strategy.scope():
    model = Sequential()

    model.add(augmentation) # Adding data augmentation pipeline to the model

    # Feature Learning Layers
    model.add(Conv2D(32,                  # Number of filters/Kernels
                     (3,3),               # Size of kernels (3x3 matrix)
                     strides = 1,         # Step size for sliding the kernel across the input (1 pixel at a time).
                     padding = 'same',    # 'Same' ensures that the output feature map has the same dimensions as the input by padding zeros around the input.
                    input_shape = (256,256,3) # Input image shape
                    ))
    model.add(Activation('relu'))# Activation function
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size = (2,2), padding = 'same'))
    model.add(Dropout(0.2))

    model.add(Conv2D(64, (5,5), padding = 'same'))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size = (2,2), padding = 'same'))
    model.add(Dropout(0.2))

    model.add(Conv2D(128, (3,3), padding = 'same'))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size = (2,2), padding = 'same'))
    model.add(Dropout(0.3))

    model.add(Conv2D(256, (5,5), padding = 'same'))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size = (2,2), padding = 'same'))
    model.add(Dropout(0.3))

    model.add(Conv2D(512, (3,3), padding = 'same'))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size = (2,2), padding = 'same'))
    model.add(Dropout(0.3))

    # Flattening tensors
    model.add(Flatten())

    # Fully-Connected Layers
    model.add(Dense(2048))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    # Output Layer
    model.add(Dense(3, activation = 'softmax')) # Classification layer

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">By using Keras' <code>compile</code> method, we can prepare our neural network for training. This method has several parameters, the ones we will be focusing here are: </p>

<div style = "margin-left: 25px;">
    
  <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• optimizer</b>: In this parameter, we define the algorithms to adjust the weight updates. This is an important parameter, because choosing the right optimizer is essential to speed convergence. We are going to use <code>RMSprop</code>, which is the best optimizer I've found during the tests I ran.</p>
    
   <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• loss</b>: This is the loss function we're trying to minimize during training. In this case, we are using <code>categorical_crossentropy</code>, which is a good choice for classification tasks with over two classes.</p>

   <p style="font-family: Calibri, serif; text-align: left;
    font-size: 24px; letter-spacing: .85px;"><b>• metrics</b>: This parameter defines the metric that will be used to evaluate performance during training and validation. Since our data is not heavily unbalanced, we may use <code>accuracy</code> for this, which is a very straightforward metric given by the following formula: </p>
    <p style="font-family: Calibri, serif; text-align: left;font-size: 24px; letter-spacing: .85px;">
    \begin{equation}
    \text{Accuracy} = \frac{\text{Number of Correct Predictions}}{\text{Total Number of Predictions}}
    \end{equation}
    </p>    
</div>

In [47]:
# Compiling model
model.compile(optimizer = tf.keras.optimizers.RMSprop(0.0001), # 1e-4
              loss = 'categorical_crossentropy', # Ideal for multiclass tasks
              metrics = ['accuracy']) # Evaluation metric

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">After compiling the model, I am going to define an <b>Early Stopping</b> and a <b>Model Checkpoint</b>.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Early Stopping serves the purpose of interrupting the training process when a certain metric stops improving over a period of time. In this case, I am going to configure the <code>EarlyStopping</code> method to monitor the accuracy in the test set, and stop the training process if we don't have any improvement on it after 5 epochs.</p>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Model Checkpoint will ensure that only the best weights get saved, and we're also going to define the <i>best weights</i> according to the accuracy of the model in the test set.</p>

In [48]:
# Defining an Early Stopping and Model Checkpoints
early_stopping = EarlyStopping(monitor = 'val_accuracy',
                              patience = 5, mode = 'max',
                              restore_best_weights = True)

checkpoint = ModelCheckpoint('best_model.keras',
                            monitor = 'val_accuracy',
                            save_best_only = True)

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">We may now use <code>model.fit()</code> to start the training and testing process.</p>  

In [49]:
# Training and Testing Model
try:
    history = model.fit(
        train, epochs = 50,
        validation_data = test,
        callbacks = [early_stopping, checkpoint])
except Exception as e:
    print("An error occurred:", e)

2024-09-27 14:51:28.266884: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
2024-09-27 14:51:28.424876: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: CANCELLED: GetNextFromShard was cancelled
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]
	 [[RemoteCall]] [type.googleapis.com/tensorflow.DerivedStatus='']


An error occurred: Mixing different tf.distribute.Strategy objects: <tensorflow.python.distribute.one_device_strategy.OneDeviceStrategy object at 0x387205360> is not <tensorflow.python.distribute.distribute_lib._DefaultDistributionStrategy object at 0x3ead0bd90>


<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">The highest accuracy for the testing set has been reached at the 22<sup>nd</sup> epoch at 0.9600, or 96%, and didn't improve after that.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">With the <code>history</code> object, we can plot two lineplots showing both the loss function and accuracy for both sets over epochs.</p>

In [None]:
# Creating subplot
fig = make_subplots(rows=1,
                    cols=2,
                    subplot_titles=['<b>Loss Over Epochs</b>', '<b>Accuracy Over Epochs</b>'],
                    horizontal_spacing=0.2)

# Loss over epochs
train_loss = go.Scatter(x=list(range(len(history.history['loss']))),
                        y=history.history['loss'],
                        mode='lines',
                        line=dict(color='rgba(0, 67, 162, .75)', width=4.75),
                        name='Training',
                        showlegend = False)

val_loss = go.Scatter(x=list(range(len(history.history['val_loss']))),
                      y=history.history['val_loss'],
                      mode='lines',
                      line=dict(color='rgba(255, 132, 0, .75)', width=4.75),
                      name='Test',
                      showlegend = False)


fig.add_trace(train_loss, row=1, col=1)
fig.add_trace(val_loss, row=1, col=1)

# Accuray over epochs
train_acc = go.Scatter(x=list(range(len(history.history['accuracy']))),
                       y=history.history['accuracy'],
                       mode='lines',
                       line=dict(color='rgba(0, 67, 162, .75)', width=4.75),
                       name='Training',
                       showlegend = True)

val_acc = go.Scatter(x=list(range(len(history.history['val_accuracy']))),
                     y=history.history['val_accuracy'],
                     mode='lines',
                     line=dict(color='rgba(255, 132, 0, .75)', width=4.75),
                     name='Test',
                     showlegend = True)


fig.add_trace(train_acc, row=1, col=2)
fig.add_trace(val_acc, row=1, col=2)

# Updating layout
fig.update_layout(
    title={'text': '<b>Loss and Accuracy Over Epochs</b>', 'x': 0.025, 'xanchor': 'left'},
    margin=dict(t=100),
    plot_bgcolor=bg_color,paper_bgcolor=paper_color,
    height=500, width=1000,
    showlegend= True
)

fig.update_yaxes(title_text = 'Loss', row = 1, col = 1)
fig.update_yaxes(title_text = 'Accuracy', row = 1, col = 2)

fig.update_xaxes(title_text = 'Epoch', row = 1, col = 1)
fig.update_xaxes(title_text = 'Epoch', row = 1, col = 2)

# Showing figure
fig.show()

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">It is possible to see that the loss of the training set decreases continuously over epochs, whereas its accuracy increases. This happens because, at each epoch, the model starts to become more and more aware of the training set's patterns and particularities.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">For the test set, however, this process is a bit more slower. Overall, the lowest loss for the test set happened at epoch 14 at 0.5319, while the accuracy was at its peak at epoch 22, at 0.9600.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Now that our model is built, trained, and tested, we can also plot its architecture, as well as summary to better understand it.</p>          

In [None]:
tf.keras.utils.plot_model(model) # Plotting model

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">In the image, it is possible to visualize the sequential process of the Convolutional Neural Network. First we have a 2D Convolutional Layer, with <i><b>ReLU</b></i> activation function, followed by a BatchNormalization Layer and then a MaxPooling 2D Layer. Finally, we have a Dropout Layer to avoid overfitting. This same pattern repeats a few times until we reach the Flatten Layer, which connects the output of the Feature Learning process to the Dense Layers for the final classification task.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Using <code>model.summary()</code>, we can extract some extra info on the neural network.</p>         

In [None]:
model.summary() # Printing model summary

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">The summary displays the output shapes for each layer, as well as the number of parameters. We can clearly see, for instance, that the output shape for the first layer is <code>(None, 256,256,3)</code> where $256$ represents both height and width, while $3$ represents the RGB color. In the last dense layer, however, the output shape is <code>(None, 3)</code>, where $3$ represents the three classes for classification.</p>
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">We can also see that the model has over 69 million parameters, where 99.99% of them are trainable. The non-trainable parameters are the ones from the BatchNormalization layers.</p>     

<div id = 'val'
     style="font-family: Calibri, serif; text-align: left;">
    <hr style="border: none;
               border-top: 2.85px solid orange;
               width: 100%;
               margin-top: 62px;
               margin-bottom: auto;
               margin-left: 0;">
    <div style="font-size: 38px; letter-spacing: 2.25px;"><b>Validating Performance</b></div>
</div>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">After finishing the training and testing phase, we may go ahead and validate our model on the validation set. To load the best weights achieved during training, we simply use the <code>load_weights</code> method. These weights will be saved with the same name we've given during the <code>ModelCheckpoint</code> configuration, when we set <code>ModelCheckpoint('best_model.h5')</code>.</p>     

In [None]:
# Loading best weights
model.load_weights('best_model.h5')

In [None]:
preds = model.predict(validation)  # Running model on the validation dataset
val_loss, val_acc = model.evaluate(validation) # Obtaining Loss and Accuracy on the val dataset

print('\nValidation Loss: ', val_loss)
print('\nValidation Accuracy: ', np.round(val_acc * 100), '%')

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">The output for <code>model.predict()</code> consists of probabilities for each class, while <code>model.evaluate()</code> returns loss and accuracy values.</p>  
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">It is clear that the model correctly predicts $97$% of the labels of the images in the validation set.</p>  
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">I am going to load some images from the validation test and run predictions on them individually, so we can see how the model performs according to each picture.</p>  

In [None]:
# Loading an image from the Validation/ Powdery directory
image_path = '/kaggle/input/plant-disease-recognition-dataset/Validation/Validation/Powdery/9b6a318cc5721d73.jpg'
original_image = Image.open(image_path)
og_width, og_height = original_image.size

# Resizing image for optimal performance
new_width = int(og_width * .20) # 20% of the original size
new_height = int(og_height * .20) # 20% of the original size

resized_img = original_image.resize((new_width, new_height))
print('Picture of a Powdery Plant: \n')
resized_img

In [None]:
# Manually preprocessing image
preprocessed_image = original_image.resize((256, 256))
preprocessed_image = np.array(preprocessed_image) / 255.0

preds = model.predict(np.expand_dims(preprocessed_image, axis = 0))
labels = ['Healthy', 'Powdery', 'Rust']

preds_class = np.argmax(preds)
preds_label = labels[preds_class]

print(f'\nPredicted Class: {preds_label}')
print(f'\nConfidence Score: {preds[0][preds_class]}')

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">The model is about 99.9% confident that the plant in the picture belongs to the <i><b>Powdery</b></i> class, which is correct.</p>  

In [None]:
# Loading an image from the Validation/ Rust directory
image_path = '/kaggle/input/plant-disease-recognition-dataset/Validation/Validation/Rust/8152cfbd5a28b5d2.jpg'
original_image = Image.open(image_path)
og_width, og_height = original_image.size

# Resizing image for optimal performance
new_width = int(og_width * .20) # 20% of the original size
new_height = int(og_height * .20) # 20% of the original size

resized_img = original_image.resize((new_width, new_height))
print('Picture of a Rust Plant: \n')
resized_img

In [None]:
# Manually preprocessing image
preprocessed_image = original_image.resize((256, 256))
preprocessed_image = np.array(preprocessed_image) / 255.0

preds = model.predict(np.expand_dims(preprocessed_image, axis = 0))
labels = ['Healthy', 'Powdery', 'Rust']

preds_class = np.argmax(preds)
preds_label = labels[preds_class]

print(f'\nPredicted Class: {preds_label}')
print(f'\nConfidence Score: {preds[0][preds_class]}')

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">The model is 100% certain that the plant in the picture belongs to the <i><b>Rust</b></i> class, which is also correct.</p>  

In [None]:
# Loading an image from the Validation/ Healthy directory
image_path = '/kaggle/input/plant-disease-recognition-dataset/Validation/Validation/Healthy/9c99786a63786571.jpg'
original_image = Image.open(image_path)
og_width, og_height = original_image.size

# Resizing image for optimal performance
new_width = int(og_width * .20) # 20% of the original size
new_height = int(og_height * .20) # 20% of the original size

resized_img = original_image.resize((new_width, new_height))
print('Picture of a Healthy Plant: \n')
resized_img

In [None]:
# Manually preprocessing image
preprocessed_image = original_image.resize((256, 256))
preprocessed_image = np.array(preprocessed_image) / 255.0

preds = model.predict(np.expand_dims(preprocessed_image, axis = 0))
labels = ['Healthy', 'Powdery', 'Rust']

preds_class = np.argmax(preds)
preds_label = labels[preds_class]

print(f'\nPredicted Class: {preds_label}')
print(f'\nConfidence Score: {preds[0][preds_class]}')

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">The model is 100% certain that the plant in the picture belongs to the <i><b>Healthy</b></i> class, which is also correct.</p>  

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">After running several tests with other pictures, I could identify that the current model is performing fairly well in classifying all the three classes.</p>  
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">To save the current weights, so you can deploy this model or continue working with it later on, you can simply use Keras' <code>.save()</code> method. This is going to save your model as an HDF5 file.</p>  

In [None]:
model.save('plant_disease_classifier.h5') # Saving model

<div id = 'conclusion'
     style="font-family: Calibri, serif; text-align: left;">
    <hr style="border: none;
               border-top: 2.85px solid orange;
               width: 100%;
               margin-top: 62px;
               margin-bottom: auto;
               margin-left: 0;">
    <div style="font-size: 38px; letter-spacing: 2.25px;"><b>Conclusion</b></div>
</div>

<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">In this notebook, we explored the basics of Convolutional Neural Networks. We delved deeper into the main layers — Convolutional, Pooling, etc. —, activation functions, as well as many other techniques to work with image data and CNNs for image classification.</p>  
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Even though many tasks nowadays can be efficiently done with pre-trained models, that can be easily accessible via platforms such as TensorFlow Hub and HuggingFace, it is still essential to understand what is the role of each layer inside a Convolutional Neural Network and how they interact with each other. This is why this notebook have the intention of guiding you through the process of building a CNN from scratch, and I plan to bring more notebooks such as this one for other <i>Deep Learning</i> tasks and architectures. </p>  
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Our model scored 97.0% in accuracy while predicting labels for the validation dataset, which is a great performance, and it was competent to identify relevant patterns across all the classes in the dataset.</p>  
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">I hope that this notebook serves as an introduction to those that are still just starting to explore ConvNets, or even help veterans to refine their knowledge on some of the basics. Please, feel free to copy this notebook and edit it as you wish, specially to try your own improvements for higher performance and testings.</p>  
          
<p style="font-family: Calibri, serif; text-align: left;
          font-size: 24px; letter-spacing: .85px;">Thank you so much for reading. Your feedback, upvotes, and suggestions are always much welcome!</p>  

<hr style="border: 0;
           height: 1px;
           border-top: 0.85px;
           solid #b2b2b2">
           
<div style="text-align: left;
            color: #8d8d8d;
            padding-left: 15px;
            font-size: 14.25px;">
    Luis Fernando Torres, 2023 <br><br>
    Let's connect!🔗<br>
    <a href="https://www.linkedin.com/in/luuisotorres/">LinkedIn</a> • <a href="https://medium.com/@luuisotorres">Medium</a> • <a href = "https://huggingface.co/luisotorres">Hugging Face</a><br><br>
</div>