In [1]:
import os
import numpy as np
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
#@markdown <a href="https://github.com/yanuarcy/Fruiteasy/blob/ML/Assets/dataset-and-pretrain.txt">Our sources</a>

# download our latest raw dataset (CombinedV6)
DATASET_URL = '1KaIHEjQgJUgRcU6YfF_wevzsw2VYvTUI' #@param {type:"string"}
!gdown --id {DATASET_URL}

Downloading...
From (original): https://drive.google.com/uc?id=1KaIHEjQgJUgRcU6YfF_wevzsw2VYvTUI
From (redirected): https://drive.google.com/uc?id=1KaIHEjQgJUgRcU6YfF_wevzsw2VYvTUI&confirm=t&uuid=cc9fd413-f1a8-4056-9ba3-9a4381dc4696
To: /content/CombinedV6.zip
100% 296M/296M [00:04<00:00, 61.2MB/s]


In [4]:
import zipfile

#@markdown Copy the zip dataset path from the files

#directories extract destination
destination_dir='./'
#zip location
zip_dir='/content/CombinedV6.zip' #@param {type:"string"}

# Extract the zip file
with zipfile.ZipFile(zip_dir, 'r') as zip_ref:
    zip_ref.extractall(destination_dir)

In [5]:
# Define the path to the parent directory
parent_262_directory = './CombinedV6/262'       # parent direktori dari dataset
parent_360_directory = './CombinedV6/360'
tempor_folder = './Complete-ready-to-useV4/'    # parent direktori dari cleaning dataset
IMAGE_SIZE = 224

# List to store the directory names
path_262 = []   # list path dari direktori dataset 262 per buah
path_360 = []   # list path dari direktori dataset 360 per buah
fruit_name = []   # list dari nama buah yang ada di dataset

# Iterate through the parent 262 directory
for item in os.listdir(parent_262_directory):
    item_path = os.path.join(parent_262_directory, item)
    if os.path.isdir(item_path):
        path_262.append(item_path)
        fruit_name.append(item)

# Itearte through the parent 360 directory
for item in os.listdir(parent_360_directory):
    item_path = os.path.join(parent_360_directory, item)
    if os.path.isdir(item_path):
        path_360.append(item_path)

# prepare datagen generator
train_datagen = ImageDataGenerator(
    rotation_range=90,
    # width_shift_range=0.2,
    # height_shift_range=0.2,
    # shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest',
    brightness_range=[0.8, 1.2],# Reasonable brightness range
    channel_shift_range=20.0,   # Moderate channel shift
)

In [6]:
# Fungsi untuk load images dari folder per buah di dataset
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(folder, filename)
            try:
                img = Image.open(img_path)
                img = img.resize((IMAGE_SIZE, IMAGE_SIZE))  # Mengubah ukuran gambar menjadi 224X224
                img = img.convert('RGB')                    # Konversi gambar ke RGB
                img_array = np.array(img)
                images.append(img_array)
            except Exception as e:
                print(f'Error loading image {img_path}: {e}')
    return np.array(images)

def augment_images(images, n):
    augmented_images = []
    for idx, image in enumerate(images):
        try:
            print(f'Processing image {idx+1}/{len(images)}')
            if image.shape != (IMAGE_SIZE, IMAGE_SIZE, 3):
                print(f'Skipping image {idx+1} due to incorrect shape: {image.shape}')
                continue

            image = image.reshape((1, IMAGE_SIZE, IMAGE_SIZE, 3))
            i = 0
            for batch in train_datagen.flow(image, batch_size=1):
                augmented_images.append(batch[0])
                i += 1
                if i >= n:  # augmentasi n kali setiap gambar
                    break
            print(f'Image {idx+1} augmented {i} times')
        except Exception as e:
            print(f'Error processing image {idx+1}: {e}')

    augmented_images = np.array(augmented_images)
    return augmented_images

def save_images_to_local(output_folder, images, augment=False):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for i, image in enumerate(images):
        img = Image.fromarray(image.astype(np.uint8))
        if augment:
            img_path = os.path.join(output_folder, f'augmented_image_{i}.jpg')
            img.save(img_path)
        else:
            img_path = os.path.join(output_folder, f'formatted_image_{i}.jpg')
            img.save(img_path)

In [7]:
# augment 262
for dir, fruit in zip(path_262, fruit_name):
    images = load_images_from_folder(dir)
    augmented_images = augment_images(images, n=10)

    output_folder = os.path.join(tempor_folder, fruit)
    save_images_to_local(output_folder, augmented_images, augment=True)

# formatting 360
for dir, fruit in zip(path_360, fruit_name):
    images = load_images_from_folder(dir)
    output_folder = os.path.join(tempor_folder, fruit)
    print(output_folder)
    save_images_to_local(output_folder, images, augment=False)

Processing image 1/32
Image 1 augmented 10 times
Processing image 2/32
Image 2 augmented 10 times
Processing image 3/32
Image 3 augmented 10 times
Processing image 4/32
Image 4 augmented 10 times
Processing image 5/32
Image 5 augmented 10 times
Processing image 6/32
Image 6 augmented 10 times
Processing image 7/32
Image 7 augmented 10 times
Processing image 8/32
Image 8 augmented 10 times
Processing image 9/32
Image 9 augmented 10 times
Processing image 10/32
Image 10 augmented 10 times
Processing image 11/32
Image 11 augmented 10 times
Processing image 12/32
Image 12 augmented 10 times
Processing image 13/32
Image 13 augmented 10 times
Processing image 14/32
Image 14 augmented 10 times
Processing image 15/32
Image 15 augmented 10 times
Processing image 16/32
Image 16 augmented 10 times
Processing image 17/32
Image 17 augmented 10 times
Processing image 18/32
Image 18 augmented 10 times
Processing image 19/32
Image 19 augmented 10 times
Processing image 20/32
Image 20 augmented 10 time

In [8]:
#zip the folder then download it!
!zip -r Complete-ready-to-useV4.zip Complete-ready-to-useV4

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: Complete-ready-to-useV4/Alpukat/augmented_image_166.jpg (deflated 4%)
  adding: Complete-ready-to-useV4/Alpukat/formatted_image_291.jpg (deflated 5%)
  adding: Complete-ready-to-useV4/Alpukat/augmented_image_173.jpg (deflated 3%)
  adding: Complete-ready-to-useV4/Alpukat/augmented_image_320.jpg (deflated 5%)
  adding: Complete-ready-to-useV4/Alpukat/formatted_image_169.jpg (deflated 3%)
  adding: Complete-ready-to-useV4/Alpukat/formatted_image_263.jpg (deflated 6%)
  adding: Complete-ready-to-useV4/Alpukat/formatted_image_163.jpg (deflated 3%)
  adding: Complete-ready-to-useV4/Alpukat/formatted_image_86.jpg (deflated 6%)
  adding: Complete-ready-to-useV4/Alpukat/formatted_image_373.jpg (deflated 6%)
  adding: Complete-ready-to-useV4/Alpukat/formatted_image_289.jpg (deflated 6%)
  adding: Complete-ready-to-useV4/Alpukat/formatted_image_194.jpg (deflated 6%)
  adding: Complete-ready-to-useV4/Alpukat/augmented_imag