#### Train Test Split

In [2]:
import os
import random
import shutil

# Define the directory containing the class folders
base_dir = r"C:\Users\Ven\Desktop\Leaflet Data 2023\data exp"
output_dir = r"C:\Users\Ven\Desktop\Leaflet-CNN-Sequential\model\data\Dataset exp"

# Get the list of subdirectories (class names)
class_names = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]

# Create necessary directories
os.makedirs(output_dir, exist_ok=True)

# Iterate through each class folder and perform train-test split
for class_name in class_names:
    og_data_dir = os.path.join(base_dir, class_name)

    # Define the output directories for train and validation data
    train_dir = os.path.join(output_dir, "train", class_name)
    valid_dir = os.path.join(output_dir, "valid", class_name)

    # Create the train and validation directories if they don't exist
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(valid_dir, exist_ok=True)

    # List all the image files in the data directory
    image_files = [f for f in os.listdir(og_data_dir) if f.endswith((".jpg", ".jpeg", ".png", ".gif", "JPG"))]

    # Calculate the number of images for the train and validation sets
    total_images = len(image_files)
    train_ratio = 0.8
    num_train = int(total_images * train_ratio)
    num_valid = total_images - num_train

    # Randomly shuffle the list of image files
    random.shuffle(image_files)

    # Copy the first 'num_train' images to the train directory
    for i in range(num_train):
        src = os.path.join(og_data_dir, image_files[i])
        dst = os.path.join(train_dir, image_files[i])
        shutil.copy(src, dst)

    # Copy the remaining images to the validation directory
    for i in range(num_train, total_images):
        src = os.path.join(og_data_dir, image_files[i])
        dst = os.path.join(valid_dir, image_files[i])
        shutil.copy(src, dst)

    print(f"Split {total_images} images for {class_name} into {num_train} for training and {num_valid} for validation.")


Split 74 images for Balayong (C NBG) into 59 for training and 15 for validation.
Split 90 images for Balayong (SL) into 72 for training and 18 for validation.
Split 77 images for Bayabas (C NBG) into 61 for training and 16 for validation.
Split 78 images for Bayabas (S NBG into 62 for training and 16 for validation.
Split 89 images for Dao (C NBG) into 71 for training and 18 for validation.
Split 91 images for Dao (S NBG) into 72 for training and 19 for validation.


### Train Test Split with Resize

In [4]:
from PIL import Image, ExifTags
import os
import random
import shutil

# Define the directory containing the class folders
base_dir = r"C:\Users\Ven\Desktop\CashCF Data"
output_dir = r"C:\Users\Ven\Desktop\Cash-Counterfeat\model\data"

# Get the list of subdirectories (class names)
class_names = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]

# Create necessary directories
os.makedirs(output_dir, exist_ok=True)

# Define the size to which you want to resize the images
target_size = (400, 300)

# Iterate through each class folder and perform train-test split
for class_name in class_names:
    og_data_dir = os.path.join(base_dir, class_name)

    # Define the output directories for train and validation data
    train_dir = os.path.join(output_dir, "train", class_name)
    valid_dir = os.path.join(output_dir, "valid", class_name)

    # Create the train and validation directories if they don't exist
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(valid_dir, exist_ok=True)

    # List all the image files in the data directory
    image_files = [f for f in os.listdir(og_data_dir) if f.endswith((".jpg", ".jpeg", ".png", ".gif", "JPG"))]

    # Calculate the number of images for the train and validation sets
    total_images = len(image_files)
    train_ratio = 0.80
    num_train = int(total_images * train_ratio)
    num_valid = total_images - num_train

    # Randomly shuffle the list of image files
    random.shuffle(image_files)

    # Copy and resize the first 'num_train' images to the train directory
    for i in range(num_train):
        src = os.path.join(og_data_dir, image_files[i])
        dst = os.path.join(train_dir, image_files[i])
        with Image.open(src) as img:
            # Check and apply EXIF orientation
            for orientation in ExifTags.TAGS.keys():
                if ExifTags.TAGS[orientation] == 'Orientation':
                    break
            try:
                exif = dict(img._getexif().items())
                if exif[orientation] == 3:
                    img = img.rotate(180, expand=True)
                elif exif[orientation] == 6:
                    img = img.rotate(270, expand=True)
                elif exif[orientation] == 8:
                    img = img.rotate(90, expand=True)
            except (AttributeError, KeyError, IndexError):
                # No EXIF orientation or another issue
                pass

            img = img.resize(target_size)
            img.save(dst)

    # Copy and resize the remaining images to the validation directory
    for i in range(num_train, total_images):
        src = os.path.join(og_data_dir, image_files[i])
        dst = os.path.join(valid_dir, image_files[i])
        with Image.open(src) as img:
            # Check and apply EXIF orientation
            for orientation in ExifTags.TAGS.keys():
                if ExifTags.TAGS[orientation] == 'Orientation':
                    break
            try:
                exif = dict(img._getexif().items())
                if exif[orientation] == 3:
                    img = img.rotate(180, expand=True)
                elif exif[orientation] == 6:
                    img = img.rotate(270, expand=True)
                elif exif[orientation] == 8:
                    img = img.rotate(90, expand=True)
            except (AttributeError, KeyError, IndexError):
                # No EXIF orientation or another issue
                pass

            img = img.resize(target_size)
            img.save(dst)

    print(f"Split {total_images} images for {class_name} into {num_train} for training and {num_valid} for validation.")


Split 200 images for Balayong into 160 for training and 40 for validation.
Split 200 images for Bayabas into 160 for training and 40 for validation.
Split 200 images for Betis into 160 for training and 40 for validation.
Split 200 images for Dao into 160 for training and 40 for validation.
Split 200 images for Dita into 160 for training and 40 for validation.
Split 200 images for Guyabano into 160 for training and 40 for validation.
Split 200 images for Ilang-Ilang into 160 for training and 40 for validation.
Split 200 images for Ipil into 160 for training and 40 for validation.
Split 200 images for Kalios into 160 for training and 40 for validation.




Split 200 images for Kamagong into 160 for training and 40 for validation.
Split 200 images for Mulawin into 160 for training and 40 for validation.
Split 200 images for Narra into 160 for training and 40 for validation.
Split 200 images for Sintores into 160 for training and 40 for validation.
Split 200 images for Yakal into 160 for training and 40 for validation.


### Train Test Split with Resize (Square)

In [5]:
from PIL import Image, ExifTags
import os
import random
import shutil

# Define the directory containing the class folders
base_dir = r"C:\Users\Ven\Desktop\Leaflet Data 2023\data exp"
output_dir = r"C:\Users\Ven\Desktop\Leaflet-CNN-Sequential\model\data\Dataset tl(14 classes 224 x 224)"

# Get the list of subdirectories (class names)
class_names = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]

# Create necessary directories
os.makedirs(output_dir, exist_ok=True)

# Define the size to which you want to resize the images
target_size = (224, 224)

# Iterate through each class folder and perform train-test split
for class_name in class_names:
    og_data_dir = os.path.join(base_dir, class_name)

    # Define the output directories for train and validation data
    train_dir = os.path.join(output_dir, "train", class_name)
    valid_dir = os.path.join(output_dir, "valid", class_name)

    # Create the train and validation directories if they don't exist
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(valid_dir, exist_ok=True)

    # List all the image files in the data directory
    image_files = [f for f in os.listdir(og_data_dir) if f.endswith((".jpg", ".jpeg", ".png", ".gif", "JPG"))]

    # Calculate the number of images for the train and validation sets
    total_images = len(image_files)
    train_ratio = 0.80
    num_train = int(total_images * train_ratio)
    num_valid = total_images - num_train

    # Randomly shuffle the list of image files
    random.shuffle(image_files)

    # Copy and resize the first 'num_train' images to the train directory
    for i in range(num_train):
        src = os.path.join(og_data_dir, image_files[i])
        dst = os.path.join(train_dir, image_files[i])
        with Image.open(src) as img:
            # Check and apply EXIF orientation
            for orientation in ExifTags.TAGS.keys():
                if ExifTags.TAGS[orientation] == 'Orientation':
                    break
            try:
                exif = dict(img._getexif().items())
                if exif[orientation] == 3:
                    img = img.rotate(180, expand=True)
                elif exif[orientation] == 6:
                    img = img.rotate(270, expand=True)
                elif exif[orientation] == 8:
                    img = img.rotate(90, expand=True)
            except (AttributeError, KeyError, IndexError):
                # No EXIF orientation or another issue
                pass

            img = img.resize(target_size)
            img.save(dst)

    # Copy and resize the remaining images to the validation directory
    for i in range(num_train, total_images):
        src = os.path.join(og_data_dir, image_files[i])
        dst = os.path.join(valid_dir, image_files[i])
        with Image.open(src) as img:
            # Check and apply EXIF orientation
            for orientation in ExifTags.TAGS.keys():
                if ExifTags.TAGS[orientation] == 'Orientation':
                    break
            try:
                exif = dict(img._getexif().items())
                if exif[orientation] == 3:
                    img = img.rotate(180, expand=True)
                elif exif[orientation] == 6:
                    img = img.rotate(270, expand=True)
                elif exif[orientation] == 8:
                    img = img.rotate(90, expand=True)
            except (AttributeError, KeyError, IndexError):
                # No EXIF orientation or another issue
                pass

            img = img.resize(target_size)
            img.save(dst)

    print(f"Split {total_images} images for {class_name} into {num_train} for training and {num_valid} for validation.")


Split 200 images for Balayong into 160 for training and 40 for validation.
Split 200 images for Bayabas into 160 for training and 40 for validation.
Split 200 images for Betis into 160 for training and 40 for validation.
Split 200 images for Dao into 160 for training and 40 for validation.
Split 200 images for Dita into 160 for training and 40 for validation.
Split 200 images for Guyabano into 160 for training and 40 for validation.
Split 200 images for Ilang-Ilang into 160 for training and 40 for validation.
Split 200 images for Ipil into 160 for training and 40 for validation.
Split 200 images for Kalios into 160 for training and 40 for validation.
Split 200 images for Kamagong into 160 for training and 40 for validation.
Split 200 images for Mulawin into 160 for training and 40 for validation.
Split 200 images for Narra into 160 for training and 40 for validation.
Split 200 images for Sintores into 160 for training and 40 for validation.
Split 200 images for Yakal into 160 for train