# Library

In [1]:
import os
import random
import shutil
import opendatasets as od

# Download File

In [2]:
od.download("https://www.kaggle.com/datasets/ahmadhabibhasanzein/concrete-crack-images-for-classification")

Downloading concrete-crack-images-for-classification.zip to ./concrete-crack-images-for-classification


100%|██████████| 234M/234M [00:07<00:00, 32.7MB/s] 





# Folder Path

In [3]:
# Define the source folder for the positive and negative images
positive_source_folder = 'concrete-crack-images-for-classification/Populasi/Negative'
negative_source_folder = 'concrete-crack-images-for-classification/Populasi/Positive'
positive_train_folder = 'Dataset/Train/Positive'
negative_train_folder = 'Dataset/Train/Negative'
positive_test_folder = 'Dataset/Test/Positive'
negative_test_folder = 'Dataset/Test/Negative'

In [4]:
# Make directories for the training and testing images
os.makedirs(positive_train_folder, exist_ok=True)
os.makedirs(negative_train_folder, exist_ok=True)
os.makedirs(positive_test_folder, exist_ok=True)
os.makedirs(negative_test_folder, exist_ok=True)

# Splitting

In [5]:
# Define the percentage of images to be used for training
train_positive_percentage = 0.8
train_negative_percentage = 0.8

In [6]:
# Get the list of all the positive and negative images
image_positive_files = [f for f in os.listdir(positive_source_folder) if f.endswith(('.jpg', '.png', '.jpeg', '.gif'))]
image_negative_files = [f for f in os.listdir(negative_source_folder) if f.endswith(('.jpg', '.png', '.jpeg', '.gif'))]

In [7]:
# Calculate the number of images to be used for training
num_positive_train_images = int(len(image_positive_files) * train_positive_percentage)
num_negative_train_images = int(len(image_negative_files) * train_negative_percentage)

In [8]:
# Check the number of images in each category
print('Number of positive images:', len(image_positive_files))
print('Number of negative images:', len(image_negative_files))
print('Number of positive training images:', num_positive_train_images)
print('Number of negative training images:', num_negative_train_images)

Number of positive images: 20000
Number of negative images: 20000
Number of positive training images: 16000
Number of negative training images: 16000


In [9]:
# Copy the positive images to the training folder
for i in range(num_positive_train_images):
    # Get the name of the image file
    image_file = image_positive_files[i]

    # Create the source path
    source_path = os.path.join(positive_source_folder, image_file)

    # Create the destination path
    destination_path = os.path.join(positive_train_folder, image_file)

    # Copy the image from the source to the destination folder
    shutil.copyfile(source_path, destination_path)

# Copy the negative images to the training folder
for i in range(num_negative_train_images):
    # Get the name of the image file
    image_file = image_negative_files[i]

    # Create the source path
    source_path = os.path.join(negative_source_folder, image_file)

    # Create the destination path
    destination_path = os.path.join(negative_train_folder, image_file)

    # Copy the image from the source to the destination folder
    shutil.copyfile(source_path, destination_path)

# Copy the positive images to the testing folder
for i in range(num_positive_train_images, len(image_positive_files)):
    # Get the name of the image file
    image_file = image_positive_files[i]

    # Create the source path
    source_path = os.path.join(positive_source_folder, image_file)

    # Create the destination path
    destination_path = os.path.join(positive_test_folder, image_file)

    # Copy the image from the source to the destination folder
    shutil.copyfile(source_path, destination_path)

# Copy the negative images to the testing folder
for i in range(num_negative_train_images, len(image_negative_files)):
    # Get the name of the image file
    image_file = image_negative_files[i]

    # Create the source path
    source_path = os.path.join(negative_source_folder, image_file)

    # Create the destination path
    destination_path = os.path.join(negative_test_folder, image_file)

    # Copy the image from the source to the destination folder
    shutil.copyfile(source_path, destination_path)

In [10]:
# Check the number of images in each category
print('Number of positive training images:', len(os.listdir(positive_train_folder)))
print('Number of negative training images:', len(os.listdir(negative_train_folder)))
print('Number of positive testing images:', len(os.listdir(positive_test_folder)))
print('Number of negative testing images:', len(os.listdir(negative_test_folder)))

Number of positive training images: 16000
Number of negative training images: 16000
Number of positive testing images: 4000
Number of negative testing images: 4000
