In [29]:
import os
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torch
import torchvision.transforms as transforms
import shutil
import torch.nn as nn
import torch.nn.functional as F
import time
import copy

In [5]:
def load_image_dataset(root_dir):
    data = []
    labels = []

    for label in os.listdir(root_dir):
        class_dir = os.path.join(root_dir, label)
        if os.path.isdir(class_dir):
            for image_name in os.listdir(class_dir):
                image_path = os.path.join(class_dir, image_name)
                try:
                    data.append(image_path)
                    labels.append(label)
                except Exception as e:
                    print(f"Error loading image {image_path}: {e}")

    return pd.DataFrame({'image': data, 'label': labels})

In [6]:
from google.colab import drive
drive.mount('/content/drive')
dataset_path = '/content/drive/My Drive/046211/Project'
df = load_image_dataset(dataset_path)
print(df)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
                                                   image  \
0      /content/drive/My Drive/046211/Project/9. Tine...   
1      /content/drive/My Drive/046211/Project/9. Tine...   
2      /content/drive/My Drive/046211/Project/9. Tine...   
3      /content/drive/My Drive/046211/Project/9. Tine...   
4      /content/drive/My Drive/046211/Project/9. Tine...   
...                                                  ...   
27148  /content/drive/My Drive/046211/Project/1. Ecze...   
27149  /content/drive/My Drive/046211/Project/1. Ecze...   
27150  /content/drive/My Drive/046211/Project/1. Ecze...   
27151  /content/drive/My Drive/046211/Project/1. Ecze...   
27152  /content/drive/My Drive/046211/Project/1. Ecze...   

                                                   label  
0      9. Tinea Ringworm Candidiasis and other Fungal...  
1      9. Tinea Ringworm Candidi

In [7]:
print(df["label"].value_counts())

label
5. Melanocytic Nevi (NV) - 7970                                     7970
4. Basal Cell Carcinoma (BCC) 3323                                  3323
2. Melanoma 15.75k                                                  3140
10. Warts Molluscum and other Viral Infections - 2103               2103
6. Benign Keratosis-like Lesions (BKL) 2624                         2079
7. Psoriasis pictures Lichen Planus and related diseases - 2k       2055
8. Seborrheic Keratoses and other Benign Tumors - 1.8k              1847
9. Tinea Ringworm Candidiasis and other Fungal Infections - 1.7k    1702
1. Eczema 1677                                                      1677
3. Atopic Dermatitis - 1.25k                                        1257
Name: count, dtype: int64


In [20]:
train_images, rest_images = train_test_split(df, test_size=0.3, random_state=42)
val_images, test_images = train_test_split(rest_images, test_size=2/3, random_state=42)

In [None]:
# Create directories
base_path = '/content/drive/My Drive/046211/Project'
for split in ['train', 'val', 'test']:
    os.makedirs(os.path.join(base_path, split), exist_ok=True)

In [34]:
# Function to move images and update DataFrame paths
def move_and_update_paths(df, split_name):
    new_paths = []
    for _, row in df.iterrows():
        src = row['image']
        # Construct new path
        new_path = os.path.join(base_path, split_name, os.path.basename(src))
        # Move image
        shutil.copy(src, new_path)
        new_paths.append(new_path)
    df['image'] = new_paths

# Move images to respective directories
#move_and_update_paths(train_images, 'train')
move_and_update_paths(val_images, 'val')
move_and_update_paths(test_images, 'test')

In [35]:
# Save DataFrames to Google Drive
#train_images.to_csv(os.path.join(base_path, 'train_images.csv'), index=False)
val_images.to_csv(os.path.join(base_path, 'val_images.csv'), index=False)
test_images.to_csv(os.path.join(base_path, 'test_images.csv'), index=False)