In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import imgaug.augmenters as iaa
from PIL import Image
import os
from tqdm import tqdm
import numpy as np

# Set the path to your dataset
dataset_path = '/content/drive/MyDrive/Colab/dataset-augmented2'

# Create an augmentation sequence
augmentation_seq = iaa.Sequential([
    iaa.Fliplr(0.2),  # horizontally flip 50% of the images
    iaa.Flipud(0.4),  # vertically flip 20% of the images
    iaa.GaussianBlur(sigma=(0, 1.0)),  # apply gaussian blur with a sigma between 0 and 1.0
    iaa.AdditiveGaussianNoise(scale=(0, 0.1 * 255)),  # add gaussian noise
    iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25),  # apply elastic transformations
    iaa.Sometimes(0.5, iaa.GammaContrast((0.5, 2.0))),  # adjust gamma contrast with a 50% chance
])

# Set the desired number of images for each class (balance)
desired_num_images_per_class = 40

# Iterate over each class in your dataset
for class_folder in tqdm(os.listdir(dataset_path)):
    class_path = os.path.join(dataset_path, class_folder)

    # Get the current number of images in the class
    num_images_in_class = len(os.listdir(class_path))

    # Calculate the number of augmentations needed to reach the desired balance
    augmentations_needed = max(0, desired_num_images_per_class - num_images_in_class)

    print(num_images_in_class, augmentations_needed)

    # Iterate over each image in the class folder
    for image_name in os.listdir(class_path):
        image_path = os.path.join(class_path, image_name)

        # Load the image using PIL
        img = Image.open(image_path)

        # Convert PIL Image to numpy array
        img_array = np.array(img)

        # Apply augmentation
        for _ in range(augmentations_needed):
            # Check if the desired number of augmentations has been reached
            if len(os.listdir(class_path)) >= desired_num_images_per_class:
              break
            augmented_img_array = augmentation_seq.augment_image(img_array)

            # Save the augmented image in the same folder
            augmented_image_path = os.path.join(class_path, f"aug_{image_name}")
            Image.fromarray(augmented_img_array).save(augmented_image_path)

# Note: This code assumes that the number of original images in each class is less than or equal to the desired number of images per class.


  0%|          | 0/30 [00:00<?, ?it/s]

32 8


  3%|▎         | 1/30 [00:16<08:10, 16.92s/it]

21 19


  7%|▋         | 2/30 [01:02<15:39, 33.57s/it]

23 17


 10%|█         | 3/30 [01:13<10:36, 23.59s/it]

22 18


 13%|█▎        | 4/30 [01:26<08:19, 19.20s/it]

29 11


 17%|█▋        | 5/30 [01:36<06:41, 16.06s/it]

32 8


 20%|██        | 6/30 [01:47<05:44, 14.37s/it]

31 9


 23%|██▎       | 7/30 [02:04<05:46, 15.05s/it]

30 10


 27%|██▋       | 8/30 [02:14<04:56, 13.46s/it]

25 15


 30%|███       | 9/30 [02:51<07:19, 20.94s/it]

31 9


 33%|███▎      | 10/30 [03:09<06:36, 19.85s/it]

28 12


 37%|███▋      | 11/30 [03:21<05:33, 17.54s/it]

39 1


 40%|████      | 12/30 [03:34<04:49, 16.11s/it]

23 17


 43%|████▎     | 13/30 [04:07<06:00, 21.19s/it]

28 12


 47%|████▋     | 14/30 [04:26<05:29, 20.60s/it]

30 10


 50%|█████     | 15/30 [04:39<04:36, 18.46s/it]

30 10


 53%|█████▎    | 16/30 [04:53<03:57, 16.97s/it]

23 17


 57%|█████▋    | 17/30 [05:15<03:58, 18.37s/it]

23 17


 60%|██████    | 18/30 [05:26<03:16, 16.39s/it]

31 9


 63%|██████▎   | 19/30 [05:39<02:46, 15.13s/it]

25 15


 67%|██████▋   | 20/30 [06:00<02:50, 17.07s/it]

24 16


 70%|███████   | 21/30 [06:33<03:15, 21.72s/it]

25 15


 73%|███████▎  | 22/30 [06:46<02:33, 19.24s/it]

26 14


 77%|███████▋  | 23/30 [07:06<02:14, 19.28s/it]

25 15


 80%|████████  | 24/30 [07:29<02:03, 20.66s/it]

30 10


 83%|████████▎ | 25/30 [07:42<01:31, 18.34s/it]

29 11


 87%|████████▋ | 26/30 [07:56<01:07, 16.84s/it]

31 9


 90%|█████████ | 27/30 [08:12<00:50, 16.81s/it]

27 13


 93%|█████████▎| 28/30 [08:46<00:43, 21.86s/it]

23 17


 97%|█████████▋| 29/30 [08:56<00:18, 18.17s/it]

33 7


100%|██████████| 30/30 [09:06<00:00, 18.23s/it]


In [None]:
import os
import pandas as pd
import numpy as np
import random
import shutil
from shutil import copyfile
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.offsetbox import (TextArea, DrawingArea, OffsetImage,
                                  AnnotationBbox)
import matplotlib.patches as mpatches

# Set the path to your dataset
dataset_path = '/content/drive/MyDrive/Colab/dataset-augmented2'
os.chdir(dataset_path)

# to list every directory name (label name)
directories_list = tf.io.gfile.listdir(dataset_path)

# get number of labels
len_labels = len(directories_list)
print(f"Total Class Labels = {len_labels}")

length_file_list = []; label_list = []

for item in directories_list:
    # get each label directory
    item_dir = os.path.join(dataset_path, item)
    # get list of images of each label
    item_files = os.listdir(item)
    # number of images per label
    len_per_label = len(os.listdir(item))

    length_file_list.append(len_per_label)
    label_list.append(item)

df_temp = pd.DataFrame({'Labels':label_list, 'Number of Images':length_file_list}).\
sort_values(by='Number of Images', ascending=False)
df_temp

Total Class Labels = 30


Unnamed: 0,Labels,Number of Images
0,cotton,40
1,almond,40
28,tea,40
27,vigna-radiati(Mung),40
26,wheat,40
25,rice,40
24,soyabean,40
23,sugarcane,40
22,tomato,40
21,pineapple,40
