In [2]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.densenet import DenseNet201
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [3]:
# Set seed for reproducibility
seed = 42
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [4]:
# Set the path to your dataset directory
dataset_dir = '/kaggle/input/identifying-disease-in-tea-leafs/tea sickness dataset'

In [5]:
# Set the desired image size
image_size = (224, 224)

# Set the batch size
batch_size = 32

# Split the dataset into training, validation, and testing sets
train_split = 0.7
valid_split = 0.15
test_split = 0.15

In [6]:
# Define the class names
class_names = ['Anthracnose', 'algal leaf', 'bird eye spot', 'brown blight', 'gray light', 'healthy', 'red leaf spot', 'white spot']
num_classes = len(class_names)

In [7]:
# Create a list to store the file paths and corresponding labels
file_paths = []
labels = []

# Iterate over the class names and collect the file paths and labels
for class_index, class_name in enumerate(class_names):
    class_dir = os.path.join(dataset_dir, class_name)
    file_names = os.listdir(class_dir)
    file_paths.extend([os.path.join(class_dir, file_name) for file_name in file_names])
    labels.extend([class_index] * len(file_names))

In [8]:
# Split the file paths and labels into training and testing sets
train_paths, test_paths, train_labels, test_labels = train_test_split(
    file_paths, labels, test_size=test_split, random_state=seed, stratify=labels
)

# Split the training set into training and validation sets
train_paths, valid_paths, train_labels, valid_labels = train_test_split(
    train_paths, train_labels, test_size=valid_split/(train_split+valid_split), random_state=seed, stratify=train_labels
)

In [9]:
# Convert labels to string class labels
train_labels = [class_names[label] for label in train_labels]
valid_labels = [class_names[label] for label in valid_labels]
test_labels = [class_names[label] for label in test_labels]

In [10]:
# Create the data generator with data augmentation for training set
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
)

In [11]:
# Load the training dataset from the file paths and labels
train_generator = train_datagen.flow_from_dataframe(
    pd.DataFrame({'paths': train_paths, 'labels': train_labels}),
    x_col='paths',
    y_col='labels',
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    classes=class_names,
    shuffle=True,
    seed=seed
)

Found 619 validated image filenames belonging to 8 classes.


In [12]:
# Create the data generator without data augmentation for validation and testing sets
valid_datagen = ImageDataGenerator(rescale=1.0 / 255.0)
test_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

In [13]:
# Load the validation dataset from the file paths and labels
valid_generator = valid_datagen.flow_from_dataframe(
    pd.DataFrame({'paths': valid_paths, 'labels': valid_labels}),
    x_col='paths',
    y_col='labels',
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    classes=class_names,
    shuffle=False,
    seed=seed
)

Found 133 validated image filenames belonging to 8 classes.


In [14]:
# Load the testing dataset from the file paths and labels
test_generator = test_datagen.flow_from_dataframe(
    pd.DataFrame({'paths': test_paths, 'labels': test_labels}),
    x_col='paths',
    y_col='labels',
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    classes=class_names,
    shuffle=False,
    seed=seed
)

Found 133 validated image filenames belonging to 8 classes.


In [15]:
# Load the pre-trained DenseNet model without the top (fully connected) layers
base_model = DenseNet201(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5


In [16]:
# Add a global average pooling layer and additional dense layers for classification
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)  # Add dropout regularization
x = Dense(256, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)

In [17]:
# Create the model
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False
    
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [18]:
# Train the model
epochs = 50
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_data=valid_generator,
    validation_steps=valid_generator.samples // batch_size,
    epochs=epochs
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [19]:
model.save('/kaggle/working/densenet_model.h5')

In [20]:
# Evaluate the model on the testing set
test_loss, test_accuracy = model.evaluate(test_generator, steps=test_generator.samples // batch_size)

# Print the test accuracy and loss
print('Test Accuracy:', test_accuracy)
print('Test Loss:', test_loss)

Test Accuracy: 0.8359375
Test Loss: 0.4254370629787445


In [21]:
# Evaluate the model on the testing set
train_loss, train_accuracy = model.evaluate(train_generator, steps=train_generator.samples // batch_size)

# Print the test accuracy and loss
print('Train Accuracy:', train_accuracy)
print('Train Loss:', train_loss)

Train Accuracy: 0.9621710777282715
Train Loss: 0.11288204044103622


In [22]:
# Evaluate the model on the testing set
valid_loss, valid_accuracy = model.evaluate(valid_generator, steps=valid_generator.samples // batch_size)

# Print the test accuracy and loss
print('Valid Accuracy:', valid_accuracy)
print('Valid Loss:', valid_loss)

Valid Accuracy: 0.8671875
Valid Loss: 0.43315213918685913
