In [None]:
# File to extract UoA-DR into directories in the ./data folder - categorised into classes for model training. 
# UoA-DR: https://figshare.com/s/5011cb600c8bdbc941f1?file=10741045 


In [2]:
import os
image_exts = ['jpeg','jpg', 'bmp', 'png']

# Expect downloaded images from UoA-DR 
data_dir = 'data/UoA-DR/' 

#  X and Y pairs outlined in https://figshare.com/s/5011cb600c8bdbc941f1?file=10910012
classes_dict = {  
    "NPDR": {*range(1, 82), *range(83, 95), 130, *range(132, 144), *range(168, 170), *range(171, 175), 179, 193},
    "PDR": {82, *range(95, 101), 131, 167, *range(176, 179), *range(182, 193), *range(194, 201)},
    "Healthy": {*range(101, 130), *range(144, 167), 170, 175, *range(180, 182)}
}

# Function to check if a string contains only digits and ends with '.jpg'
def is_retinal_only(file_name):
    return file_name[:-4].isdigit() and file_name.endswith(".jpg")

# Function to find the class of an image given its full filename
def find_class(file_name):
    image_number = file_name[:-4]
    
    for image_class, value_set in classes_dict.items():
        if int(image_number) in value_set:
            print(f"The associated key for {image_number} is: {image_class}")
            break
    else:
        print(f"No associated key found for {image_number}")
    
    return image_class
    
# Create the new folder if it doesn't exist
for image_class in classes_dict.keys():
    new_folder = os.path.join(data_dir, image_class)
    os.makedirs(new_folder, exist_ok=True)

# Modify the data_dir so its is in the tree struction of data_dir -> image_class -> image
for image_folder in os.listdir(data_dir): 
    for image in os.listdir(os.path.join(data_dir, image_folder)):
        if is_retinal_only(image):
            image_class = find_class(image)

            # Move the image to the new folder
            original_image_path = os.path.join(data_dir, image_folder, image)
            new_image_path = os.path.join(data_dir, image_class, image)
            os.rename(original_image_path, new_image_path)
            
            print("Original Path:", original_image_path)
            print("New Path:", new_image_path)

The associated key for 147 is: Healthy
Original Path: data/UoA-DR/Healthy/147.jpg
New Path: data/UoA-DR/Healthy/147.jpg
The associated key for 127 is: Healthy
Original Path: data/UoA-DR/Healthy/127.jpg
New Path: data/UoA-DR/Healthy/127.jpg
The associated key for 102 is: Healthy
Original Path: data/UoA-DR/Healthy/102.jpg
New Path: data/UoA-DR/Healthy/102.jpg
The associated key for 110 is: Healthy
Original Path: data/UoA-DR/Healthy/110.jpg
New Path: data/UoA-DR/Healthy/110.jpg
The associated key for 166 is: Healthy
Original Path: data/UoA-DR/Healthy/166.jpg
New Path: data/UoA-DR/Healthy/166.jpg
The associated key for 108 is: Healthy
Original Path: data/UoA-DR/Healthy/108.jpg
New Path: data/UoA-DR/Healthy/108.jpg
The associated key for 163 is: Healthy
Original Path: data/UoA-DR/Healthy/163.jpg
New Path: data/UoA-DR/Healthy/163.jpg
The associated key for 170 is: Healthy
Original Path: data/UoA-DR/Healthy/170.jpg
New Path: data/UoA-DR/Healthy/170.jpg
The associated key for 148 is: Healthy
O