In [1]:
import os
import cv2
import random

SRC_DIR = r"c:\Users\saike\Downloads\medimg_py397\final_final"
OUT_DIR = r"c:\Users\saike\Downloads\medimg_py397\final_final_resized"
IMG_SIZE = (128, 128)        # width, height
TARGET_PER_CLASS = 2000
RANDOM_SEED = 42
VALID_EXT = {'.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff'}

os.makedirs(OUT_DIR, exist_ok=True)
random.seed(RANDOM_SEED)

def is_image_file(name):
    return os.path.splitext(name.lower())[1] in VALID_EXT

for cls in sorted(os.listdir(SRC_DIR)):
    src_cls = os.path.join(SRC_DIR, cls)
    if not os.path.isdir(src_cls):
        continue
    out_cls = os.path.join(OUT_DIR, cls)
    os.makedirs(out_cls, exist_ok=True)

    # collect image files
    files = [f for f in os.listdir(src_cls) if is_image_file(f) and os.path.isfile(os.path.join(src_cls, f))]
    if len(files) == 0:
        print(f"[WARN] no images found in {src_cls}, skipping")
        continue

    # sample files
    if len(files) < TARGET_PER_CLASS:
        print(f"[WARN] class '{cls}' has only {len(files)} images (< {TARGET_PER_CLASS}). All will be used.")
        chosen = sorted(files)
    else:
        random.shuffle(files)
        chosen = files[:TARGET_PER_CLASS]

    print(f"[INFO] class='{cls}' src_count={len(files)} chosen={len(chosen)} -> saving to {out_cls}")

    # process and write
    for i, fname in enumerate(sorted(chosen)):
        src_path = os.path.join(src_cls, fname)
        img = cv2.imread(src_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            print(f"  [SKIP] cannot read: {src_path}")
            continue
        resized = cv2.resize(img, IMG_SIZE, interpolation=cv2.INTER_AREA)
        # create a safe unique name to avoid collisions
        out_name = f"{i:05d}_{fname}"
        out_path = os.path.join(out_cls, out_name)
        cv2.imwrite(out_path, resized)

print("Done. Reduced dataset saved to:", OUT_DIR)

[INFO] class='Mild Impairment' src_count=2560 chosen=2000 -> saving to c:\Users\saike\Downloads\medimg_py397\final_final_resized\Mild Impairment
[INFO] class='Moderate Impairment' src_count=2560 chosen=2000 -> saving to c:\Users\saike\Downloads\medimg_py397\final_final_resized\Moderate Impairment
[INFO] class='No Impairment' src_count=2560 chosen=2000 -> saving to c:\Users\saike\Downloads\medimg_py397\final_final_resized\No Impairment
[INFO] class='Very Mild Impairment' src_count=2560 chosen=2000 -> saving to c:\Users\saike\Downloads\medimg_py397\final_final_resized\Very Mild Impairment
Done. Reduced dataset saved to: c:\Users\saike\Downloads\medimg_py397\final_final_resized


In [5]:
import os
import cv2
import shutil
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# --------------------------------
# 1. PATHS
# --------------------------------
SOURCE_DIR = r"C:\Users\saike\Downloads\medimg_py397\OASIS Dataset\input\Moderate Dementia"
OUTPUT_DIR = r"C:\Users\saike\Downloads\medimg_py397\balanced_dataset\Moderate Dementia"
TARGET_COUNT = 3000

os.makedirs(OUTPUT_DIR, exist_ok=True)

# --------------------------------
# 2. AUGMENTATION CONFIG
# --------------------------------
datagen = ImageDataGenerator(
    rotation_range=25,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    brightness_range=(0.8, 1.2),
    fill_mode='nearest'
)

# --------------------------------
# 3. LOAD IMAGES
# --------------------------------
images = [f for f in os.listdir(SOURCE_DIR)
          if f.lower().endswith(('.jpg', '.png', '.jpeg'))]

print("Original images:", len(images))

# Copy originals
for img_name in images:
    shutil.copy(
        os.path.join(SOURCE_DIR, img_name),
        os.path.join(OUTPUT_DIR, img_name)
    )

# --------------------------------
# 4. AUGMENT SAFELY (NO save_to_dir)
# --------------------------------
needed = TARGET_COUNT - len(images)
aug_index = 0

for img_name in images:
    if needed <= 0:
        break

    img_path = os.path.join(SOURCE_DIR, img_name)
    img = cv2.imread(img_path)

    if img is None:
        continue

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.expand_dims(img, axis=0)

    aug_iter = datagen.flow(img, batch_size=1)

    for _ in range(5):
        if needed <= 0:
            break

        augmented = next(aug_iter)[0]   # get augmented image
        augmented = augmented.astype(np.uint8)

        save_name = f"aug_{aug_index}.jpg"
        save_path = os.path.join(OUTPUT_DIR, save_name)

        cv2.imwrite(save_path, cv2.cvtColor(augmented, cv2.COLOR_RGB2BGR))

        aug_index += 1
        needed -= 1

print("ðŸŽ‰ DONE! Moderate Dementia class balanced.")
print("Final image count:", len(os.listdir(OUTPUT_DIR)))


Original images: 488
ðŸŽ‰ DONE! Moderate Dementia class balanced.
Final image count: 2928


In [1]:
import os
import cv2
import random

SRC_DIR = r"C:\Users\saike\Downloads\medimg_py397\OASIS Dataset\input"
OUT_DIR = r"c:\Users\saike\Downloads\medimg_py397\OASIS Dataset Resized"
IMG_SIZE = (128, 128)        # width, height
TARGET_PER_CLASS = 2000
RANDOM_SEED = 42
VALID_EXT = {'.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff'}

os.makedirs(OUT_DIR, exist_ok=True)
random.seed(RANDOM_SEED)

def is_image_file(name):
    return os.path.splitext(name.lower())[1] in VALID_EXT

for cls in sorted(os.listdir(SRC_DIR)):
    src_cls = os.path.join(SRC_DIR, cls)
    if not os.path.isdir(src_cls):
        continue
    out_cls = os.path.join(OUT_DIR, cls)
    os.makedirs(out_cls, exist_ok=True)

    # collect image files
    files = [f for f in os.listdir(src_cls) if is_image_file(f) and os.path.isfile(os.path.join(src_cls, f))]
    if len(files) == 0:
        print(f"[WARN] no images found in {src_cls}, skipping")
        continue

    # sample files
    if len(files) < TARGET_PER_CLASS:
        print(f"[WARN] class '{cls}' has only {len(files)} images (< {TARGET_PER_CLASS}). All will be used.")
        chosen = sorted(files)
    else:
        random.shuffle(files)
        chosen = files[:TARGET_PER_CLASS]

    print(f"[INFO] class='{cls}' src_count={len(files)} chosen={len(chosen)} -> saving to {out_cls}")

    # process and write
    for i, fname in enumerate(sorted(chosen)):
        src_path = os.path.join(src_cls, fname)
        img = cv2.imread(src_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            print(f"  [SKIP] cannot read: {src_path}")
            continue
        resized = cv2.resize(img, IMG_SIZE, interpolation=cv2.INTER_AREA)
        # create a safe unique name to avoid collisions
        out_name = f"{i:05d}_{fname}"
        out_path = os.path.join(out_cls, out_name)
        cv2.imwrite(out_path, resized)

print("Done. Reduced dataset saved to:", OUT_DIR)

[INFO] class='Mild Dementia' src_count=3000 chosen=2000 -> saving to c:\Users\saike\Downloads\medimg_py397\OASIS Dataset Resized\Mild Dementia
[INFO] class='Moderate Dementia' src_count=2928 chosen=2000 -> saving to c:\Users\saike\Downloads\medimg_py397\OASIS Dataset Resized\Moderate Dementia
[INFO] class='Non Demented' src_count=3000 chosen=2000 -> saving to c:\Users\saike\Downloads\medimg_py397\OASIS Dataset Resized\Non Demented
[INFO] class='Very mild Dementia' src_count=3000 chosen=2000 -> saving to c:\Users\saike\Downloads\medimg_py397\OASIS Dataset Resized\Very mild Dementia
Done. Reduced dataset saved to: c:\Users\saike\Downloads\medimg_py397\OASIS Dataset Resized
