### Importing libraries

In [1]:
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score,f1_score
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from skimage.morphology import skeletonize
from skimage import feature

  "class": algorithms.Blowfish,


### Folder Paths

In [2]:
# Train
input_folder_train="D:\DR_final\\a. Training Set"
output_optic_disc_train = "D:\DR_final\Optic Disc"
output_folder_ma_train="D:\\DR_final\\MA"
output_folder_bv_train="D:\\DR_final\\Blood Vessels"
output_folder_ex_train="D:\DR_final\Exudates"

In [3]:
# Test
input_folder_test="D:\DR_final\\b. Testing Set"
output_optic_disc_test = "D:\DR_final\Optic_disc_test"
output_folder_ma_test="D:\\DR_final\\MA test"
output_folder_bv_test="D:\DR_final\Blood Vessels test"
output_folder_ex_test="D:\\DR_final\\Exudates test"

### Preprocessing+ Segmentation

### Train

#### Optic Disc

In [4]:
def optic_disc(image):
    ratio  = min([1152/image.shape[0], 1500/image.shape[1]])
    img = cv2.resize(image, (int(image.shape[1] * ratio), int(image.shape[0] * ratio)), interpolation=cv2.INTER_CUBIC)
    image_r = img[:, :, 2]

    threshold_value = 245
    _, thresh = cv2.threshold(image_r, threshold_value, 255, cv2.THRESH_BINARY)

    kernel_opening = np.ones((5, 5), np.uint8)
    opened_image = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel_opening, iterations=2)

    kernel_closing = np.ones((10, 10), np.uint8)
    closed_optic_disc = cv2.morphologyEx(opened_image, cv2.MORPH_CLOSE, kernel_closing, iterations=2)

    contours, _ = cv2.findContours(closed_optic_disc, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)

    if len(contours) > 0:
        largest_contour = contours[0]
        optic_disc_mask = np.zeros_like(image_r)
        cv2.drawContours(optic_disc_mask, [largest_contour], -1, 255, thickness=cv2.FILLED)
        optic_disc_image = cv2.bitwise_and(image_r, image_r, mask=optic_disc_mask)
        smooth_optic_disc = cv2.GaussianBlur(optic_disc_image, (0, 0), sigmaX=5, sigmaY=5)
        kernel = np.ones((10, 10), np.uint8)
        dilated_optic_disc = cv2.dilate(smooth_optic_disc, kernel, iterations=1)
        _, binary_dilated_optic_disc = cv2.threshold(dilated_optic_disc, 1, 255, cv2.THRESH_BINARY)
        contours, _ = cv2.findContours(binary_dilated_optic_disc, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        filled_optic_disc = dilated_optic_disc.copy()
        cv2.drawContours(filled_optic_disc, contours, -1, 255, thickness=cv2.FILLED)
        return filled_optic_disc

In [5]:
for filename in os.listdir(input_folder_train):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        image = cv2.imread(os.path.join(input_folder_train, filename))
        if image is None:
            continue  

        od_image = optic_disc(image)
        if od_image is None:
            continue  

        output_path = os.path.join(output_optic_disc_train, filename)
        cv2.imwrite(output_path, od_image)

#### Microaneurysm

In [6]:
def adjust_gamma(image, gamma=1.0):
   table = np.array([((i / 255.0) ** gamma) * 255
   for i in np.arange(0, 256)]).astype("uint8")
   return cv2.LUT(image, table)

def extract_ma(image):
    r,g,b=cv2.split(image)
    comp=255-g
    clahe = cv2.createCLAHE(clipLimit=5.0, tileGridSize=(8,8))
    histe=clahe.apply(comp)
    adjustImage = adjust_gamma(histe,gamma=3)
    comp = 255-adjustImage
    J =  adjust_gamma(comp,gamma=4)
    J = 255-J
    J = adjust_gamma(J,gamma=4)
    
    K=np.ones((11,11),np.float32)
    L = cv2.filter2D(J,-1,K)
    
    ret3,thresh2 = cv2.threshold(L,125,255,cv2.THRESH_BINARY|cv2.THRESH_OTSU)
    kernel2=np.ones((9,9),np.uint8)
    tophat = cv2.morphologyEx(thresh2, cv2.MORPH_TOPHAT, kernel2)
    kernel3=np.ones((7,7),np.uint8)
    opening = cv2.morphologyEx(tophat, cv2.MORPH_OPEN, kernel3)
    return opening

In [7]:
for filename in os.listdir(input_folder_train):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        image = cv2.imread(os.path.join(input_folder_train, filename))
        ma_image = extract_ma(image)
        output_path = os.path.join(output_folder_ma_train, filename)
        cv2.imwrite(output_path, ma_image)

#### Blood Vessels

In [8]:
def blood_vessel(image):
    ratio = min([1152 / image.shape[0], 1500 / image.shape[1]])
    resized_image = cv2.resize(image, (int(image.shape[1] * ratio), int(image.shape[0] * ratio)), interpolation=cv2.INTER_CUBIC)
    gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)

    b, g, r = cv2.split(resized_image)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_green_channel = clahe.apply(g)
    
    img_medf = cv2.medianBlur(enhanced_green_channel, 131)
    img_sub = cv2.subtract(img_medf, enhanced_green_channel)
    img_subf = cv2.blur(img_sub, (7, 7))
    ret, img_darkf = cv2.threshold(img_subf, 12, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
    img_darkl = cv2.morphologyEx(img_darkf, cv2.MORPH_OPEN, kernel)

    img_medf1 = cv2.medianBlur(enhanced_green_channel, 131)
    img_sub1 = cv2.subtract(img_medf1, enhanced_green_channel)
    img_subf1 = cv2.blur(img_sub1, (7, 7))
    ret, img_darkf1 = cv2.threshold(img_subf1, 12, 255, cv2.THRESH_BINARY)
    img_darkl1 = cv2.morphologyEx(img_darkf1, cv2.MORPH_OPEN, kernel)

    img_both = cv2.bitwise_or(img_darkl, img_darkl1)

    result = cv2.resize(img_both, (enhanced_green_channel.shape[1], enhanced_green_channel.shape[0]), interpolation=cv2.INTER_CUBIC)
    
    return result

In [9]:
for filename in os.listdir(input_folder_train):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        image = cv2.imread(os.path.join(input_folder_train, filename))
        bv_image = blood_vessel(image)
        output_path = os.path.join(output_folder_bv_train, filename)
        cv2.imwrite(output_path, bv_image)

#### Exudates

In [4]:
def exudate_extraction(image):
    image=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
   
    total_x_position = 0
    total_white_pixels = 0
    total_y_position = 0
    sum_values=0
    v,k=image.shape
    for y in range(len(image)):
        for x in range(len(image)):
            sum_values+=image[y][x]
    avg=sum_values/(v*k)
    
    threshh = (2.3547*avg) + 10.292
    _, binary_image = cv2.threshold(image, threshh, 255, cv2.THRESH_BINARY) #148

    for y in range(len(binary_image)):
        for x in range(len(binary_image)):
            if binary_image[y][x] == 255:
                total_x_position += x
                total_y_position += y
                total_white_pixels += 1
                
     
    if total_white_pixels == 0:
        average_x_position = 0
        average_y_position=0
    else:
        average_x_position = total_x_position / total_white_pixels
        average_y_position = total_y_position / total_white_pixels
    if average_y_position<1000:
        for j in range(binary_image.shape[0]):
            for i1 in range(binary_image.shape[1]):
                if j<(average_y_position+300):
                    binary_image[j,i1]=0
    if average_x_position>2200 or average_x_position==0:
        for j in range(binary_image.shape[0]):
            for i1 in range(binary_image.shape[1]):
                if i1>(average_x_position-240):
                    binary_image[j,i1]=0
    else:
        for j in range(binary_image.shape[0]):
            for i1 in range(binary_image.shape[1]):
                if i1<(average_x_position+240): 
                    binary_image[j,i1]=0

    return binary_image

In [None]:
for filename in os.listdir(input_folder_train):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        image = cv2.imread(os.path.join(input_folder_train, filename))
        ex_image = exudate_extraction(image)
        output_path = os.path.join(output_folder_ex_train, filename)
        cv2.imwrite(output_path, ex_image)

### Test

#### Optic Disc

In [4]:
def optic_disc(image):
    ratio  = min([1152/image.shape[0], 1500/image.shape[1]])
    img = cv2.resize(image, (int(image.shape[1] * ratio), int(image.shape[0] * ratio)), interpolation=cv2.INTER_CUBIC)
    image_r = img[:, :, 2]

    threshold_value = 245
    _, thresh = cv2.threshold(image_r, threshold_value, 255, cv2.THRESH_BINARY)

    kernel_opening = np.ones((5, 5), np.uint8)
    opened_image = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel_opening, iterations=2)

    kernel_closing = np.ones((10, 10), np.uint8)
    closed_optic_disc = cv2.morphologyEx(opened_image, cv2.MORPH_CLOSE, kernel_closing, iterations=2)

    contours, _ = cv2.findContours(closed_optic_disc, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)

    if len(contours) > 0:
        largest_contour = contours[0]
        optic_disc_mask = np.zeros_like(image_r)
        cv2.drawContours(optic_disc_mask, [largest_contour], -1, 255, thickness=cv2.FILLED)
        optic_disc_image = cv2.bitwise_and(image_r, image_r, mask=optic_disc_mask)
        smooth_optic_disc = cv2.GaussianBlur(optic_disc_image, (0, 0), sigmaX=5, sigmaY=5)
        kernel = np.ones((10, 10), np.uint8)
        dilated_optic_disc = cv2.dilate(smooth_optic_disc, kernel, iterations=1)
        _, binary_dilated_optic_disc = cv2.threshold(dilated_optic_disc, 1, 255, cv2.THRESH_BINARY)
        contours, _ = cv2.findContours(binary_dilated_optic_disc, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        filled_optic_disc = dilated_optic_disc.copy()
        cv2.drawContours(filled_optic_disc, contours, -1, 255, thickness=cv2.FILLED)
        return filled_optic_disc

In [5]:
for filename in os.listdir(input_folder_test):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        image = cv2.imread(os.path.join(input_folder_test, filename))
        if image is None:
            continue  

        od_image = optic_disc(image)
        if od_image is None:
            continue 

        output_path = os.path.join(output_optic_disc_test, filename)
        cv2.imwrite(output_path, od_image)

#### Microaneurysm

In [6]:
def adjust_gamma(image, gamma=1.0):
   table = np.array([((i / 255.0) ** gamma) * 255
   for i in np.arange(0, 256)]).astype("uint8")
   return cv2.LUT(image, table)

def extract_ma(image):
    r,g,b=cv2.split(image)
    comp=255-g
    clahe = cv2.createCLAHE(clipLimit=5.0, tileGridSize=(8,8))
    histe=clahe.apply(comp)
    adjustImage = adjust_gamma(histe,gamma=3)
    comp = 255-adjustImage
    J =  adjust_gamma(comp,gamma=4)
    J = 255-J
    J = adjust_gamma(J,gamma=4)
    
    K=np.ones((11,11),np.float32)
    L = cv2.filter2D(J,-1,K)
    
    ret3,thresh2 = cv2.threshold(L,125,255,cv2.THRESH_BINARY|cv2.THRESH_OTSU)
    kernel2=np.ones((9,9),np.uint8)
    tophat = cv2.morphologyEx(thresh2, cv2.MORPH_TOPHAT, kernel2)
    kernel3=np.ones((7,7),np.uint8)
    opening = cv2.morphologyEx(tophat, cv2.MORPH_OPEN, kernel3)
    return opening

In [7]:
for filename in os.listdir(input_folder_test):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        image = cv2.imread(os.path.join(input_folder_test, filename))
        ma_image = extract_ma(image)
        output_path = os.path.join(output_folder_ma_test, filename)
        cv2.imwrite(output_path, ma_image)

#### Blood Vessels

In [8]:
def blood_vessel(image):
    ratio = min([1152 / image.shape[0], 1500 / image.shape[1]])
    resized_image = cv2.resize(image, (int(image.shape[1] * ratio), int(image.shape[0] * ratio)), interpolation=cv2.INTER_CUBIC)
    gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)

    b, g, r = cv2.split(resized_image)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_green_channel = clahe.apply(g)
    
    img_medf = cv2.medianBlur(enhanced_green_channel, 131)
    img_sub = cv2.subtract(img_medf, enhanced_green_channel)
    img_subf = cv2.blur(img_sub, (7, 7))
    ret, img_darkf = cv2.threshold(img_subf, 12, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
    img_darkl = cv2.morphologyEx(img_darkf, cv2.MORPH_OPEN, kernel)

    img_medf1 = cv2.medianBlur(enhanced_green_channel, 131)
    img_sub1 = cv2.subtract(img_medf1, enhanced_green_channel)
    img_subf1 = cv2.blur(img_sub1, (7, 7))
    ret, img_darkf1 = cv2.threshold(img_subf1, 12, 255, cv2.THRESH_BINARY)
    img_darkl1 = cv2.morphologyEx(img_darkf1, cv2.MORPH_OPEN, kernel)

    img_both = cv2.bitwise_or(img_darkl, img_darkl1)

    result = cv2.resize(img_both, (enhanced_green_channel.shape[1], enhanced_green_channel.shape[0]), interpolation=cv2.INTER_CUBIC)
    
    return result

In [9]:
for filename in os.listdir(input_folder_test):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        image = cv2.imread(os.path.join(input_folder_test, filename))
        bv_image = blood_vessel(image)
        output_path = os.path.join(output_folder_bv_test, filename)
        cv2.imwrite(output_path, bv_image)

#### Exudates

In [10]:
def exudate_extraction(image):
    image=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
   
    total_x_position = 0
    total_white_pixels = 0
    total_y_position = 0
    sum_values=0
    v,k=image.shape
    for y in range(len(image)):
        for x in range(len(image)):
            sum_values+=image[y][x]
    avg=sum_values/(v*k)
    
    threshh = (2.3547*avg) + 10.292
    _, binary_image = cv2.threshold(image, threshh, 255, cv2.THRESH_BINARY) 

    for y in range(len(binary_image)):
        for x in range(len(binary_image)):
            if binary_image[y][x] == 255:
                total_x_position += x
                total_y_position += y
                total_white_pixels += 1
                
     
    if total_white_pixels == 0:
        average_x_position = 0
        average_y_position=0
    else:
        average_x_position = total_x_position / total_white_pixels
        average_y_position = total_y_position / total_white_pixels
    if average_y_position<1000:
        for j in range(binary_image.shape[0]):
            for i1 in range(binary_image.shape[1]):
                if j<(average_y_position+300):
                    binary_image[j,i1]=0
    if average_x_position>2200 or average_x_position==0:
        for j in range(binary_image.shape[0]):
            for i1 in range(binary_image.shape[1]):
                if i1>(average_x_position-240):
                    binary_image[j,i1]=0
    else:
        for j in range(binary_image.shape[0]):
            for i1 in range(binary_image.shape[1]):
                if i1<(average_x_position+240): 
                    binary_image[j,i1]=0

    return binary_image

In [None]:
for filename in os.listdir(input_folder_test):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        image = cv2.imread(os.path.join(input_folder_test, filename))
        ex_image = exudate_extraction(image)
        output_path = os.path.join(output_folder_ex_test, filename)
        cv2.imwrite(output_path, ex_image)

### Feature Extraction

### Train

In [36]:
train_df=pd.read_csv("D:\DR_final\\train_df.csv")

In [37]:
train_df.head()

Unnamed: 0.1,Unnamed: 0,Image name,New Retinopathy Grade
0,0,IDRiD_001,1
1,1,IDRiD_002,1
2,2,IDRiD_003,1
3,3,IDRiD_004,1
4,4,IDRiD_005,1


In [38]:
del train_df['Unnamed: 0']

In [39]:
train_df.head()

Unnamed: 0,Image name,New Retinopathy Grade
0,IDRiD_001,1
1,IDRiD_002,1
2,IDRiD_003,1
3,IDRiD_004,1
4,IDRiD_005,1


In [40]:
train_df['New Retinopathy Grade'].value_counts()

1    279
0    134
Name: New Retinopathy Grade, dtype: int64

#### Feature:1 Area of Optic Disc

In [41]:
optic_disc_path_train="D:\DR_final\\Optic Disc"

In [42]:
def area_optic_disc(image):
    white_pixels = (image == 255).sum()
    return white_pixels

In [43]:
areas_dict = {}

for image_name in train_df["Image name"]:
    image_path = os.path.join(optic_disc_path_train, f"{image_name}.jpg")
    if os.path.exists(image_path):
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) 
        area = area_optic_disc(image)  
    else:
        area = 0  

    areas_dict[image_name] = area

train_df["Optic Disc Area"] = train_df["Image name"].map(areas_dict)

In [44]:
train_df

Unnamed: 0,Image name,New Retinopathy Grade,Optic Disc Area
0,IDRiD_001,1,22103
1,IDRiD_002,1,31765
2,IDRiD_003,1,13279
3,IDRiD_004,1,57810
4,IDRiD_005,1,7313
...,...,...,...
408,IDRiD_409,1,33153
409,IDRiD_410,1,25120
410,IDRiD_411,1,30642
411,IDRiD_412,1,15665


#### Feature:2  Ratio of Microaneurysm

In [45]:
MA_path_train="D:\DR_final\\MA"

In [46]:
def Ratio_MA(image):
    num_white_pixels = cv2.countNonZero(image)
    total_pixels = image.size
    ratio = num_white_pixels / total_pixels
    return ratio

In [47]:
ma_ratios = []
for image_name in train_df["Image name"]:
    image_path = os.path.join(MA_path_train, f"{image_name}.jpg")

    if os.path.exists(image_path):

        ma_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  
        ma_ratio = Ratio_MA(ma_image) 
    else:
        ma_ratio = 0  
    ma_ratios.append(ma_ratio)

train_df["MA Ratio"] = ma_ratios

In [48]:
train_df

Unnamed: 0,Image name,New Retinopathy Grade,Optic Disc Area,MA Ratio
0,IDRiD_001,1,22103,0.000339
1,IDRiD_002,1,31765,0.000180
2,IDRiD_003,1,13279,0.000010
3,IDRiD_004,1,57810,0.000252
4,IDRiD_005,1,7313,0.000572
...,...,...,...,...
408,IDRiD_409,1,33153,0.000488
409,IDRiD_410,1,25120,0.000413
410,IDRiD_411,1,30642,0.000348
411,IDRiD_412,1,15665,0.000501


#### Feature:3 Blood Vessel Length

In [49]:
Blood_vessels_path_train="D:\DR_final\Blood Vessels"

In [50]:
vessel_lengths = []
for image_name in train_df["Image name"]:
    image_path = os.path.join(Blood_vessels_path_train, f"{image_name}.jpg")

    if os.path.exists(image_path):
        vessel_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        vessel_skeleton = skeletonize(vessel_image)
        length = np.sum(vessel_skeleton)
    else:
        length = 0  

    vessel_lengths.append(length)
train_df["Vessel Length"] = vessel_lengths

In [51]:
train_df

Unnamed: 0,Image name,New Retinopathy Grade,Optic Disc Area,MA Ratio,Vessel Length
0,IDRiD_001,1,22103,0.000339,62149
1,IDRiD_002,1,31765,0.000180,53187
2,IDRiD_003,1,13279,0.000010,30581
3,IDRiD_004,1,57810,0.000252,47614
4,IDRiD_005,1,7313,0.000572,45913
...,...,...,...,...,...
408,IDRiD_409,1,33153,0.000488,35944
409,IDRiD_410,1,25120,0.000413,47678
410,IDRiD_411,1,30642,0.000348,57830
411,IDRiD_412,1,15665,0.000501,55980


#### Feature:4 Blood Vessel Tortuosity 

In [52]:
vessel_tortuosities = []

for image_name in train_df["Image name"]:
    image_path = os.path.join(Blood_vessels_path_train, f"{image_name}.jpg")

    if os.path.exists(image_path):
        vessel_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        vessel_skeleton = skeletonize(vessel_image)
        vessel_length = np.sum(vessel_skeleton)
        
        contours, _ = cv2.findContours(vessel_skeleton.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        perimeter = sum(cv2.arcLength(contour, True) for contour in contours)
        tortuosity = perimeter / vessel_length
    else:
        tortuosity = 0  

    vessel_tortuosities.append(tortuosity)


train_df["Vessel Tortuosity"] = vessel_tortuosities

In [53]:
train_df

Unnamed: 0,Image name,New Retinopathy Grade,Optic Disc Area,MA Ratio,Vessel Length,Vessel Tortuosity
0,IDRiD_001,1,22103,0.000339,62149,1.337445
1,IDRiD_002,1,31765,0.000180,53187,1.459258
2,IDRiD_003,1,13279,0.000010,30581,1.538633
3,IDRiD_004,1,57810,0.000252,47614,1.507505
4,IDRiD_005,1,7313,0.000572,45913,1.523906
...,...,...,...,...,...,...
408,IDRiD_409,1,33153,0.000488,35944,1.564850
409,IDRiD_410,1,25120,0.000413,47678,1.499443
410,IDRiD_411,1,30642,0.000348,57830,1.376905
411,IDRiD_412,1,15665,0.000501,55980,1.094860


#### Feature:5 Edge Density

In [54]:
image_directory_train="D:\DR_final\\a. Training Set"

In [55]:
edge_density_list = []

for image_name in train_df["Image name"]:
    image_path = os.path.join(image_directory_train, f"{image_name}.jpg")

    if os.path.exists(image_path):
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        edges = cv2.Canny(image, threshold1=30, threshold2=70) 
        edge_density = np.count_nonzero(edges) / (edges.shape[0] * edges.shape[1])
        edge_density_list.append(edge_density)
        

    else:
        edge_density_list.append(0)
        edge_orientation_list.append([])
        
train_df["Edge_Density"] = edge_density_list

In [56]:
train_df

Unnamed: 0,Image name,New Retinopathy Grade,Optic Disc Area,MA Ratio,Vessel Length,Vessel Tortuosity,Edge_Density
0,IDRiD_001,1,22103,0.000339,62149,1.337445,0.019490
1,IDRiD_002,1,31765,0.000180,53187,1.459258,0.018572
2,IDRiD_003,1,13279,0.000010,30581,1.538633,0.002163
3,IDRiD_004,1,57810,0.000252,47614,1.507505,0.021770
4,IDRiD_005,1,7313,0.000572,45913,1.523906,0.021906
...,...,...,...,...,...,...,...
408,IDRiD_409,1,33153,0.000488,35944,1.564850,0.014212
409,IDRiD_410,1,25120,0.000413,47678,1.499443,0.015686
410,IDRiD_411,1,30642,0.000348,57830,1.376905,0.018373
411,IDRiD_412,1,15665,0.000501,55980,1.094860,0.019889


#### Feature 6:Mean Intensity

#### Feature 7: Standard Deviation

In [57]:
mean_intensity_list = []
std_deviation_list = []

for image_name in train_df["Image name"]:
    image_path = os.path.join(image_directory_train, f"{image_name}.jpg")

    if os.path.exists(image_path):
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

        mean_intensity = np.mean(image)
        std_deviation = np.std(image)

        mean_intensity_list.append(mean_intensity)
        std_deviation_list.append(std_deviation)
    else:
        mean_intensity_list.append(0)
        std_deviation_list.append(0)

train_df["Mean_Intensity"] = mean_intensity_list
train_df["Std_Deviation"] = std_deviation_list

In [None]:
train_df

#### Feature 8: Exudate density

In [None]:
exudates_path_train="D:\\DR_final\\Exudates"

In [None]:
exudate_densities = []

for image_name in train_df["Image name"]:
    image_path = os.path.join(exudates_path_train, f"{image_name}.jpg")

    if os.path.exists(image_path):
        exudates_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        exudate_count = cv2.countNonZero(exudates_image)
        total_pixels = exudates_image.size
        exudate_density = exudate_count / total_pixels
    else:
        exudate_density = 0
    exudate_densities.append(exudate_density)

train_df["Exudate Density"] = exudate_densities

### Converting dataframe to csv

In [None]:
csv_file_path = "D:\DR_final\Feature Extraction\\train_df_final.csv"
train_df.to_csv(csv_file_path, index=False)

### Test

In [4]:
test_df=pd.read_csv("D:\\DR_final\\test_df.csv")

In [5]:
test_df.head()

Unnamed: 0.1,Unnamed: 0,Image name,New Retinopathy Grade
0,0,IDRiD_001,1
1,1,IDRiD_002,1
2,2,IDRiD_003,1
3,3,IDRiD_004,1
4,4,IDRiD_005,1


In [6]:
del test_df['Unnamed: 0']

In [7]:
test_df.head()

Unnamed: 0,Image name,New Retinopathy Grade
0,IDRiD_001,1
1,IDRiD_002,1
2,IDRiD_003,1
3,IDRiD_004,1
4,IDRiD_005,1


In [8]:
test_df['New Retinopathy Grade'].value_counts()

1    69
0    34
Name: New Retinopathy Grade, dtype: int64

#### Feature 1: Optic Disc Area

In [9]:
optic_disc_path_test="D:\DR_final\\Optic_disc_test"

In [10]:
def area_optic_disc(image):
    white_pixels = (image == 255).sum()
    return white_pixels

In [11]:
areas_dict = {}

for image_name in test_df["Image name"]:
    image_path = os.path.join(optic_disc_path_test, f"{image_name}.jpg")

    if os.path.exists(image_path):
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) 
        area = area_optic_disc(image)  
    else:
        area = 0 
    areas_dict[image_name] = area

test_df["Optic Disc Area"] = test_df["Image name"].map(areas_dict)

In [12]:
test_df

Unnamed: 0,Image name,New Retinopathy Grade,Optic Disc Area
0,IDRiD_001,1,0
1,IDRiD_002,1,4534
2,IDRiD_003,1,0
3,IDRiD_004,1,4542
4,IDRiD_005,1,0
...,...,...,...
98,IDRiD_099,0,34341
99,IDRiD_100,1,24742
100,IDRiD_101,1,25725
101,IDRiD_102,1,60382


#### Feature 2: Ratio of Microaneurysm

In [13]:
MA_path_test="D:\DR_final\\Optic Disc"

In [14]:
def Ratio_MA(image):
    num_white_pixels = cv2.countNonZero(image)
    total_pixels = image.size
    ratio = num_white_pixels / total_pixels
    return ratio

In [16]:
ma_ratios = []

for image_name in test_df["Image name"]:
    image_path = os.path.join(MA_path_test, f"{image_name}.jpg")

    if os.path.exists(image_path):

        ma_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  
        ma_ratio = Ratio_MA(ma_image) 
    else:
        ma_ratio = 0  
    ma_ratios.append(ma_ratio)

test_df["MA Ratio"] = ma_ratios

In [17]:
test_df

Unnamed: 0,Image name,New Retinopathy Grade,Optic Disc Area,MA Ratio
0,IDRiD_001,1,0,0.016201
1,IDRiD_002,1,4534,0.022860
2,IDRiD_003,1,0,0.009889
3,IDRiD_004,1,4542,0.040811
4,IDRiD_005,1,0,0.005705
...,...,...,...,...
98,IDRiD_099,0,34341,0.018354
99,IDRiD_100,1,24742,0.003042
100,IDRiD_101,1,25725,0.023414
101,IDRiD_102,1,60382,0.018744


#### Feature 3: Blood Vessel Length

In [18]:
Blood_vessels_path_test="D:\\DR_final\\Blood Vessels test"

In [19]:
vessel_lengths = []

for image_name in test_df["Image name"]:
    image_path = os.path.join(Blood_vessels_path_test, f"{image_name}.jpg")

    if os.path.exists(image_path):
        vessel_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        vessel_skeleton = skeletonize(vessel_image)
        length = np.sum(vessel_skeleton)
    else:
        length = 0  

    vessel_lengths.append(length)
test_df["Vessel Length"] = vessel_lengths

In [20]:
test_df

Unnamed: 0,Image name,New Retinopathy Grade,Optic Disc Area,MA Ratio,Vessel Length
0,IDRiD_001,1,0,0.016201,16616
1,IDRiD_002,1,4534,0.022860,26817
2,IDRiD_003,1,0,0.009889,30009
3,IDRiD_004,1,4542,0.040811,71288
4,IDRiD_005,1,0,0.005705,19307
...,...,...,...,...,...
98,IDRiD_099,0,34341,0.018354,56677
99,IDRiD_100,1,24742,0.003042,44123
100,IDRiD_101,1,25725,0.023414,42640
101,IDRiD_102,1,60382,0.018744,52402


#### Feature 4:Vessel Tortuosity 

In [21]:
vessel_tortuosities = []

for image_name in test_df["Image name"]:
    image_path = os.path.join(Blood_vessels_path_test, f"{image_name}.jpg")

    if os.path.exists(image_path):
        vessel_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        vessel_skeleton = skeletonize(vessel_image)
        vessel_length = np.sum(vessel_skeleton)
        contours, _ = cv2.findContours(vessel_skeleton.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        perimeter = sum(cv2.arcLength(contour, True) for contour in contours)
        tortuosity = perimeter / vessel_length
    else:
        tortuosity = 0  

    vessel_tortuosities.append(tortuosity)

test_df["Vessel Tortuosity"] = vessel_tortuosities

In [22]:
test_df

Unnamed: 0,Image name,New Retinopathy Grade,Optic Disc Area,MA Ratio,Vessel Length,Vessel Tortuosity
0,IDRiD_001,1,0,0.016201,16616,1.553983
1,IDRiD_002,1,4534,0.022860,26817,1.542775
2,IDRiD_003,1,0,0.009889,30009,1.558268
3,IDRiD_004,1,4542,0.040811,71288,1.418744
4,IDRiD_005,1,0,0.005705,19307,1.556777
...,...,...,...,...,...,...
98,IDRiD_099,0,34341,0.018354,56677,1.377675
99,IDRiD_100,1,24742,0.003042,44123,1.437596
100,IDRiD_101,1,25725,0.023414,42640,1.324905
101,IDRiD_102,1,60382,0.018744,52402,1.459592


#### Feature 5: Edge Density

In [23]:
image_directory_test="D:\\DR_final\\b. Testing Set"
edge_density_list = []

for image_name in test_df["Image name"]:
    image_path = os.path.join(image_directory_test, f"{image_name}.jpg")

    if os.path.exists(image_path):
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        edges = cv2.Canny(image, threshold1=30, threshold2=70) 
        edge_density = np.count_nonzero(edges) / (edges.shape[0] * edges.shape[1])
        edge_density_list.append(edge_density)
        

    else:
        edge_density_list.append(0)
        edge_orientation_list.append([])
        
test_df["Edge_Density"] = edge_density_list

In [24]:
test_df

Unnamed: 0,Image name,New Retinopathy Grade,Optic Disc Area,MA Ratio,Vessel Length,Vessel Tortuosity,Edge_Density
0,IDRiD_001,1,0,0.016201,16616,1.553983,0.000809
1,IDRiD_002,1,4534,0.022860,26817,1.542775,0.004242
2,IDRiD_003,1,0,0.009889,30009,1.558268,0.007770
3,IDRiD_004,1,4542,0.040811,71288,1.418744,0.036408
4,IDRiD_005,1,0,0.005705,19307,1.556777,0.013398
...,...,...,...,...,...,...,...
98,IDRiD_099,0,34341,0.018354,56677,1.377675,0.021375
99,IDRiD_100,1,24742,0.003042,44123,1.437596,0.000779
100,IDRiD_101,1,25725,0.023414,42640,1.324905,0.000780
101,IDRiD_102,1,60382,0.018744,52402,1.459592,0.000713


#### Feature 6: Mean Intensity

#### Feature 7: Standard Deviation

In [25]:
mean_intensity_list = []
std_deviation_list = []

for image_name in test_df["Image name"]:
    image_path = os.path.join(image_directory_test, f"{image_name}.jpg")

    if os.path.exists(image_path):
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        mean_intensity = np.mean(image)
        std_deviation = np.std(image)

        mean_intensity_list.append(mean_intensity)
        std_deviation_list.append(std_deviation)
    else:
        mean_intensity_list.append(0)
        std_deviation_list.append(0)

test_df["Mean_Intensity"] = mean_intensity_list
test_df["Std_Deviation"] = std_deviation_list

In [26]:
test_df

Unnamed: 0,Image name,New Retinopathy Grade,Optic Disc Area,MA Ratio,Vessel Length,Vessel Tortuosity,Edge_Density,Mean_Intensity,Std_Deviation
0,IDRiD_001,1,0,0.016201,16616,1.553983,0.000809,27.543258,19.713929
1,IDRiD_002,1,4534,0.022860,26817,1.542775,0.004242,41.831461,31.072523
2,IDRiD_003,1,0,0.009889,30009,1.558268,0.007770,52.665440,36.961635
3,IDRiD_004,1,4542,0.040811,71288,1.418744,0.036408,65.251439,47.482030
4,IDRiD_005,1,0,0.005705,19307,1.556777,0.013398,42.553625,33.469694
...,...,...,...,...,...,...,...,...,...
98,IDRiD_099,0,34341,0.018354,56677,1.377675,0.021375,75.658445,51.356860
99,IDRiD_100,1,24742,0.003042,44123,1.437596,0.000779,57.594719,39.012111
100,IDRiD_101,1,25725,0.023414,42640,1.324905,0.000780,59.010476,40.656457
101,IDRiD_102,1,60382,0.018744,52402,1.459592,0.000713,77.110763,52.669210


#### Feature 8: Exudates Density

In [27]:
exudates_path_test="D:\\DR_final\\Exudates test"

In [29]:
exudate_densities = []

for image_name in test_df["Image name"]:
    image_path = os.path.join(exudates_path_test, f"{image_name}.jpg")

    if os.path.exists(image_path):
        exudates_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        exudate_count = cv2.countNonZero(exudates_image)
        total_pixels = exudates_image.size
        exudate_density = exudate_count / total_pixels
    else:
        exudate_density = 0
    exudate_densities.append(exudate_density)

test_df["Exudate Density"] = exudate_densities

In [30]:
test_df

Unnamed: 0,Image name,New Retinopathy Grade,Optic Disc Area,MA Ratio,Vessel Length,Vessel Tortuosity,Edge_Density,Mean_Intensity,Std_Deviation,Exudate Density
0,IDRiD_001,1,0,0.016201,16616,1.553983,0.000809,27.543258,19.713929,0.000058
1,IDRiD_002,1,4534,0.022860,26817,1.542775,0.004242,41.831461,31.072523,0.004857
2,IDRiD_003,1,0,0.009889,30009,1.558268,0.007770,52.665440,36.961635,0.003784
3,IDRiD_004,1,4542,0.040811,71288,1.418744,0.036408,65.251439,47.482030,0.008269
4,IDRiD_005,1,0,0.005705,19307,1.556777,0.013398,42.553625,33.469694,0.053730
...,...,...,...,...,...,...,...,...,...,...
98,IDRiD_099,0,34341,0.018354,56677,1.377675,0.021375,75.658445,51.356860,0.000222
99,IDRiD_100,1,24742,0.003042,44123,1.437596,0.000779,57.594719,39.012111,0.000078
100,IDRiD_101,1,25725,0.023414,42640,1.324905,0.000780,59.010476,40.656457,0.005478
101,IDRiD_102,1,60382,0.018744,52402,1.459592,0.000713,77.110763,52.669210,0.013541


### Converting Data Frame to Csv

In [31]:
csv_file_path = "D:\DR_final\Feature Extraction\\test_df_final.csv"
test_df.to_csv(csv_file_path, index=False)

### Classification Model

In [4]:
train_final_df=pd.read_csv("D:\DR_final\Feature Extraction\\train_df_final.csv")
test_final_df=pd.read_csv("D:\DR_final\Feature Extraction\\test_df_final.csv")

In [5]:
train_final_df.head()

Unnamed: 0,Image name,New Retinopathy Grade,Optic Disc Area,MA Ratio,Vessel Length,Vessel Tortuosity,Edge_Density,Mean_Intensity,Std_Deviation,Exudate Density
0,IDRiD_001,1,22103,0.000339,62149,1.337445,0.01949,71.999626,48.732911,0.003891
1,IDRiD_002,1,31765,0.00018,53187,1.459258,0.018572,70.925728,47.566503,0.002264
2,IDRiD_003,1,13279,1e-05,30581,1.538633,0.002163,53.298688,37.172219,0.000445
3,IDRiD_004,1,57810,0.000252,47614,1.507505,0.02177,63.372044,46.031325,0.005635
4,IDRiD_005,1,7313,0.000572,45913,1.523906,0.021906,72.531829,48.698143,0.0


In [6]:
X_train = train_final_df.drop(columns=["Image name", "New Retinopathy Grade","Optic Disc Area","Edge_Density","Std_Deviation","Exudate Density"])
y_train = train_final_df["New Retinopathy Grade"]
X_test = test_final_df.drop(columns=["Image name", "New Retinopathy Grade","Optic Disc Area","Edge_Density","Std_Deviation","Exudate Density"])
y_test = test_final_df["New Retinopathy Grade"]

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
def model_evaluation(y_test, y_pred, model_name):
    print("Classification report for", model_name, "\n")
    print(classification_report(y_test, y_pred))

    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

#### Logistic Regression

In [9]:
model_name="LOGISTIC REGRESSION"

In [10]:
lr=LogisticRegression()
lr.fit(X_train, y_train)

In [11]:
y_pred_lr=lr.predict(X_test)

In [12]:
f1_score_lr=f1_score(y_test, y_pred_lr, average="weighted")
print(f1_score_lr)

0.30202467778660513


In [13]:
model_evaluation(y_test,y_pred_lr,model_name)

Classification report for LOGISTIC REGRESSION 

              precision    recall  f1-score   support

           0       0.34      0.94      0.50        34
           1       0.80      0.12      0.20        69

    accuracy                           0.39       103
   macro avg       0.57      0.53      0.35       103
weighted avg       0.65      0.39      0.30       103

Accuracy: 0.3883495145631068


#### Random Forest

In [14]:
model_name="RANDOM FOREST"

In [15]:
rf=RandomForestClassifier()
rf.fit(X_train, y_train)

In [16]:
y_pred_rf=rf.predict(X_test)

In [17]:
f1_score_rf=f1_score(y_test, y_pred_rf, average="weighted")
print(f1_score_rf)

0.7346034671793813


In [18]:
model_evaluation(y_test,y_pred_rf,model_name)

Classification report for RANDOM FOREST 

              precision    recall  f1-score   support

           0       0.61      0.56      0.58        34
           1       0.79      0.83      0.81        69

    accuracy                           0.74       103
   macro avg       0.70      0.69      0.70       103
weighted avg       0.73      0.74      0.73       103

Accuracy: 0.7378640776699029


#### Gradient Boosting

In [19]:
model_name="GRADIENT BOOSTING"

In [20]:
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)

In [21]:
y_pred_gb = gb.predict(X_test)

In [22]:
f1_score_gb = f1_score(y_test, y_pred_gb, average="weighted")
print(f1_score_gb)

0.5010481023830539


In [23]:
model_evaluation(y_test,y_pred_gb,model_name)

Classification report for GRADIENT BOOSTING 

              precision    recall  f1-score   support

           0       0.39      0.88      0.55        34
           1       0.85      0.33      0.48        69

    accuracy                           0.51       103
   macro avg       0.62      0.61      0.51       103
weighted avg       0.70      0.51      0.50       103

Accuracy: 0.5145631067961165


#### SVC

In [24]:
model_name="SVC"

In [25]:
svm = SVC()
svm.fit(X_train, y_train)

In [26]:
y_pred_svm = svm.predict(X_test)

In [27]:
f1_score_svm = f1_score(y_test, y_pred_svm, average="weighted")
print(f1_score_svm)

0.5594860938754654


In [28]:
model_evaluation(y_test,y_pred_svm,model_name)

Classification report for SVC 

              precision    recall  f1-score   support

           0       1.00      0.03      0.06        34
           1       0.68      1.00      0.81        69

    accuracy                           0.68       103
   macro avg       0.84      0.51      0.43       103
weighted avg       0.78      0.68      0.56       103

Accuracy: 0.6796116504854369


#### KNNClassifier

In [29]:
model_name="KNNClassifier"

In [30]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)

In [31]:
y_pred_knn = knn.predict(X_test)

In [32]:
f1_score_knn = f1_score(y_test, y_pred_knn, average="weighted")
print(f1_score_knn)

0.30677625598680036


In [33]:
model_evaluation(y_test,y_pred_knn,model_name)

Classification report for KNNClassifier 

              precision    recall  f1-score   support

           0       0.33      0.88      0.48        34
           1       0.69      0.13      0.22        69

    accuracy                           0.38       103
   macro avg       0.51      0.51      0.35       103
weighted avg       0.57      0.38      0.31       103

Accuracy: 0.3786407766990291


#### Accuracy Comparison

In [34]:
accuracy_lr=round(accuracy_score(y_test,y_pred_lr),2)
accuracy_rf=round(accuracy_score(y_test,y_pred_rf),2)
accuracy_gb=round(accuracy_score(y_test,y_pred_gb),2)
accuracy_svm=round(accuracy_score(y_test,y_pred_svm),2)
accuracy_KNN=round(accuracy_score(y_test,y_pred_knn),2)

In [35]:
accuracy_dict = {
    'Model': ['Logistic Regression', 'Random Forest', 'Gradient Boosting', 'SVM', 'KNN'],
    'Accuracy': [accuracy_lr, accuracy_rf, accuracy_gb, accuracy_svm, accuracy_KNN]
}
accuracy_score_model = pd.DataFrame(accuracy_dict)

In [36]:
accuracy_score_model

Unnamed: 0,Model,Accuracy
0,Logistic Regression,0.39
1,Random Forest,0.74
2,Gradient Boosting,0.51
3,SVM,0.68
4,KNN,0.38


### Saving the trained Model

In [37]:
! pip install  joblib



In [41]:
file_path = "D:\\DR_final\\Diabetic Retinopathy Classification.joblib"

In [42]:
from joblib import dump, load
dump(rf, file_path)


['D:\\DR_final\\Diabetic Retinopathy Classification.joblib']

### Mean Absolute Error for Segmentation

In [None]:
import os
import cv2
import numpy as np


image_folder = "D:\\DR detection\\a. Training Set"

image_files = os.listdir(image_folder)

total_mae_od = 0.0
total_mae_ma = 0.0
total_mae_ex = 0.0
total_images = 0

i = 1
j = 0


for image_file in image_files:
    if image_file.endswith((".jpg", ".png")):
        image_path = os.path.join(image_folder, image_file)
        original_image = cv2.imread(image_path)

        od_image = optic_disc(original_image)
        ma_image = extract_ma(original_image)
        ex_image = exudate_extraction(original_image)

        ground_truth_folder_ma ="D:\\DR detection\\1. Microaneurysms"
        ground_truth_folder_exudate = "D:\\DR detection\\3. Hard Exudates"
        ground_truth_folder_optic_disc = "D:\\DR detection\\5. Optic Disc"
        ground_truths_ma = [f for f in os.listdir(ground_truth_folder_ma)]
        ground_truths_optic_disc = [f for f in os.listdir(ground_truth_folder_optic_disc)]
        ground_truths_exudate = [f for f in os.listdir(ground_truth_folder_exudate)]

        ma_path = os.path.join(ground_truth_folder_ma, ground_truths_ma[j])
        ground_truth_ma = cv2.imread(ma_path, cv2.IMREAD_GRAYSCALE)
        od_path = os.path.join(ground_truth_folder_optic_disc, ground_truths_optic_disc[j])
        ground_truth_optic_disc = cv2.imread(od_path, cv2.IMREAD_GRAYSCALE)
        ex_path = os.path.join(ground_truth_folder_exudate, ground_truths_exudate[j])
        ground_truth_exudate = cv2.imread(ex_path, cv2.IMREAD_GRAYSCALE)
        j += 1

        mae_od = np.mean(np.abs(ground_truth_optic_disc - od_image))
        mae_ex = np.mean(np.abs(ground_truth_exudate - ex_image))
        mae_ma = np.mean(np.abs(ground_truth_ma - ma_image))

        total_mae_od += mae_od
        total_mae_ma += mae_ma
        total_mae_ex += mae_ex

        total_images += 1

average_mae_od = total_mae_od / total_images
average_mae_ma = total_mae_ma / total_images
average_mae_ex = total_mae_ex / total_images

print("Average MAE for Optic Disc Segmentation:", average_mae_od)
print("Average MAE for MA Segmentation:", average_mae_ma)
print("Average MAE for Exudate Segmentation:", average_mae_ex)