In [2]:
# File to extract UoA-DR into directories in the ./data folder - categorised into classes for model training. 
# Modify the data_dir so its is in the tree struction of data_dir -> image_class -> image
# UoA-DR: https://figshare.com/s/5011cb600c8bdbc941f1?file=10741045 

import os

# Expect downloaded images from UoA-DR 
data_dir = 'data/UoA-DR/' 

#  X and Y pairs outlined in https://figshare.com/s/5011cb600c8bdbc941f1?file=10910012
classes_dict = {  
    "NPDR": {*range(1, 82), *range(83, 95), 130, *range(132, 144), *range(168, 170), *range(171, 175), 179, 193},
    "PDR": {82, *range(95, 101), 131, 167, *range(176, 179), *range(182, 193), *range(194, 201)},
    "Healthy": {*range(101, 130), *range(144, 167), 170, 175, *range(180, 182)}
}

In [3]:
image_exts = ['jpeg','jpg', 'bmp', 'png']

# Function to check if a string contains only digits and ends with '.jpg'
def is_retinal_only(file_name):
    return file_name[:-4].isdigit() and file_name.endswith(".jpg")

# Function to find the class of an image given its full filename
def find_class(file_name):
    image_number = file_name[:-4]
    
    for image_class, value_set in classes_dict.items():
        if int(image_number) in value_set:
            print(f"The associated key for {image_number} is: {image_class}")
            break
    else:
        print(f"No associated key found for {image_number}")
    
    return image_class
    
# Create the new folder if it doesn't exist
for image_class in classes_dict.keys():
    new_folder = os.path.join(data_dir, image_class)
    os.makedirs(new_folder, exist_ok=True)

# Modify the data_dir so its is in the tree struction of data_dir -> image_class -> image
for image_folder in os.listdir(data_dir): 
    for image in os.listdir(os.path.join(data_dir, image_folder)):
        if is_retinal_only(image):
            image_class = find_class(image)

            # Move the image to the new folder
            original_image_path = os.path.join(data_dir, image_folder, image)
            new_image_path = os.path.join(data_dir, image_class, image)
            os.rename(original_image_path, new_image_path)
            
            print("Original Path:", original_image_path)
            print("New Path:", new_image_path)

KeyboardInterrupt: 

In [6]:
import csv

# Create a list to store rows of data
data_rows = []

# Iterate through the dictionary and create rows
for class_name, values in classes_dict.items():
    for value in values:
        # Initialize the one-hot encoding columns
        one_hot_encoding = {
            "NPDR": 0,
            "PDR": 0,
            "Healthy": 0,
        }
        one_hot_encoding[class_name] = 1
        
        # Append the row to the data_rows list
        data_rows.append([str(value), class_name, one_hot_encoding["Healthy"], one_hot_encoding["NPDR"], one_hot_encoding["PDR"]])

# Write the data to a CSV file
csv_filename = "UoA-DR-labels.csv"
csv_file_path = os.path.join(data_dir, csv_filename)

with open(csv_file_path, "w", newline="") as csvfile:
    csv_writer = csv.writer(csvfile)
    
    # Write the header row
    csv_writer.writerow(["Image Number", "Class", "Healthy", "NPDR", "PDR"])
    
    # Write the data rows
    csv_writer.writerows(data_rows)

print("CSV file created:", csv_file_path)


CSV file created: data/UoA-DR/UoA-DR-labels.csv
