In [1]:
# ✅ Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import os
import shutil
import pandas as pd
from google.colab import files
from tqdm import tqdm  # Progress bar
from PIL import Image

# ✅ Step 2: Upload CSV File
uploaded = files.upload()  # Upload CSV manually
csv_filename = list(uploaded.keys())[0]  # Get the uploaded file name

# ✅ Step 3: Load the CSV File
df = pd.read_csv(csv_filename)
print(df.head())  # Check the first few rows

# ✅ Step 4: Define Paths
drive_path = "/content/drive/MyDrive/DATABase/"  # Root directory in Google Drive
image_folder = f"{drive_path}/HAM10000_images"  # Folder where original images are stored
mel_folder = f"{drive_path}/melanoma"  # Destination for melanoma images
nev_folder = f"{drive_path}/nevus"  # Destination for nevus images

# ✅ Step 5: Create Folders If They Don't Exist
os.makedirs(mel_folder, exist_ok=True)
os.makedirs(nev_folder, exist_ok=True)

# ✅ Step 6: Move Images to Their Respective Folders
for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing Images"):
    image_id = row['image_id'] + ".jpg"  # Assuming images are in .jpg format
    label = row['dx']

    source_path = os.path.join(image_folder, image_id)  # Path to the original image

    # Define destination folder based on the label
    if label == "mel":
        dest_folder = mel_folder
    elif label == "nv":
        dest_folder = nev_folder
    else:
        continue  # Skip other conditions if necessary

    dest_path = os.path.join(dest_folder, image_id)  # Path to move the image

    # Move the image if it exists
    if os.path.exists(source_path):
        shutil.copy(source_path, dest_path)
    else:
        print(f"❌ Warning: Image {image_id} not found in {image_folder}")

print("✅ Sorting Complete! Check Google Drive for sorted images.")


Mounted at /content/drive


Saving metadata_ground_truth.csv to metadata_ground_truth.csv
     lesion_id      image_id   dx dx_type   age     sex localization  \
0  HAM_0005678  ISIC_0031023  mel   histo  60.0    male        chest   
1  HAM_0005191  ISIC_0031177  mel   histo  40.0  female         back   
2  HAM_0007310  ISIC_0026120  mel   histo  55.0    male         back   
3  HAM_0004476  ISIC_0030417  mel   histo  70.0    male         face   
4  HAM_0000876  ISIC_0026531  mel   histo  55.0    male      abdomen   

        dataset  benign_malignant  APC  ...  PIF  PLF  PLR  PRL  PRLC  PV  \
0  vidir_modern                 1    0  ...    0    0    0    1     0   0   
1     rosendahl                 1    0  ...    0    0    0    0     0   0   
2  vidir_modern                 1    0  ...    0    0    0    0     0   1   
3  vidir_modern                 1    0  ...    0    0    0    0     0   1   
4  vidir_modern                 1    0  ...    0    0    0    0     0   1   

   SPC  TRBL  WLSA  annotator  
0    0    

Processing Images: 100%|██████████| 6498/6498 [41:54<00:00,  2.58it/s]

✅ Sorting Complete! Check Google Drive for sorted images.



