In [9]:
import cv2
import numpy as np
import pandas as pd
from skimage.feature.texture import greycomatrix
from skimage.feature.texture import greycoprops
from skimage.measure import shannon_entropy
import os
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm

In [7]:
def get_feature(img):
    clahe = cv2.createCLAHE(clipLimit = 5)
    img_clahe = clahe.apply(img) + 30
    
    img_median = cv2.medianBlur(img_clahe, 5)
    
    img_graymatrix = greycomatrix(img_median, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4])
#     print("img_graymatrix shape: ",img_graymatrix.shape)
    img_homogeneity = greycoprops(img_graymatrix, 'homogeneity')
#     print("img_homogeneity shape: ", img_homogeneity.shape)
    img_entropy = shannon_entropy(img_graymatrix)
    img_entropy = np.asarray([img_entropy])
#     print("img_entropy: ", img_entropy)
#     print("img_entropy shape: ", img_entropy.shape)
    img_correlation = greycoprops(img_graymatrix, 'correlation')
#     print("img_correlation shape: ",img_correlation.shape)

    
    img_correlation_flattened = img_correlation.flatten()
    img_homogeneity_flattened = img_homogeneity.flatten()

    features = np.concatenate([img_correlation_flattened, img_homogeneity_flattened, img_entropy])
#     print("final_feature shape: ",features.shape)
    
    return features
        

In [12]:
def feature_extraction(path_to_folder, class_label):
    data_list=[]
#     count=1
    for file_name in tqdm(os.listdir(path_to_folder)):
#         if(count>1):
#             break
        path_to_img = os.path.join(path_to_folder,file_name)
        img = cv2.imread(path_to_img)

        if np.shape(img) == ():
            continue

#         img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #changing to grayscale.
        img_g = img[:,:,1] # Green channel

        F = get_feature(img_g)

        final_feature=list(F)
#         print(len(final_feature))
        final_feature.insert(0,file_name)
        final_feature.insert(1,class_label)

        data_list.append(final_feature)
#         count+=1
        
    return(data_list)

In [13]:
normal_path = 'PATH_TO_NORMAL_CONDITION_EYE_FOLDER'
proliferative_dr_path = 'PATH_TO_PROLIFEREATIVE_DR_EYE_FOLDER'

data_list1 = feature_extraction(normal_path, 0)
data_list2 = feature_extraction(proliferative_dr_path, 1)

df = pd.DataFrame(data_list1)
df = df.append(pd.DataFrame(data_list2), ignore_index=True)
# #  --------------------------------------------------------------------------------------


df.rename(columns = {0: "image_names", 1: "label"}, inplace = True)

100%|██████████| 360/360 [00:08<00:00, 41.15it/s]
100%|██████████| 360/360 [00:08<00:00, 43.54it/s]


In [14]:
df.head()

Unnamed: 0,image_names,label,2,3,4,5,6,7,8,9,10
0,20060410_44934_0200_PP.png,0,0.939992,0.90625,0.939216,0.91209,0.454434,0.368172,0.452075,0.37504,1.188855
1,20051214_56688_0100_PP.png,0,0.946969,0.920531,0.946564,0.921128,0.476562,0.387336,0.47526,0.395696,1.086486
2,20060530_53062_0100_PP.png,0,0.947533,0.921273,0.946329,0.920982,0.510741,0.423567,0.509151,0.42877,0.999567
3,20060529_57351_0100_PP.png,0,0.937723,0.905454,0.936522,0.904627,0.443276,0.346297,0.432108,0.351767,1.215094
4,20060412_58819_0200_PP.png,0,0.946365,0.921889,0.946932,0.919335,0.469154,0.381953,0.456682,0.384565,1.140948


In [15]:
df.shape

(720, 11)

In [16]:
df.to_csv('Normal_VS_dr_features.csv', index=False)

### Classification

In [None]:
# df=pd.read_csv('features.csv')

In [17]:
array=df.values
x_feature=array[:,2:]
y_label=array[:,1].astype('int')
print(x_feature.shape)
print(y_label.shape)

(720, 9)
(720,)


In [18]:
X_train,X_test,Y_train,Y_test=train_test_split(x_feature,y_label,test_size=0.10,random_state=7)

In [19]:
# Normalise the data after splitting to avoid information leak between train and test set.

scaler_norm = MinMaxScaler()

X_train = scaler_norm.fit_transform(X_train)
X_test = scaler_norm.fit_transform(X_test)

#### SVM classifier

In [20]:
# Random check
model_SVC=SVC(kernel='linear',C=100,gamma=0.001)

kfold=KFold(n_splits=10, shuffle=True)
cv_results=cross_val_score(model_SVC,X_train,Y_train,cv=kfold,scoring='accuracy')
msg="%s %f (%f)" % ('Training Accuracy: ',cv_results.mean(),cv_results.std())
print(msg)

Training Accuracy:  0.955288 (0.025210)


In [21]:
# SVM hypertuning using GridSeachCV
model_SVC=SVC()

kfold=KFold(n_splits=10)
param_grid = {'C': [1, 10, 100, 500, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']} 

grid=GridSearchCV(estimator=model_SVC,param_grid=param_grid,scoring='accuracy',cv=kfold,verbose=3)
grid_result=grid.fit(X_train,Y_train)

print("Best: %f using %s" % (grid_result.best_score_,grid_result.best_params_))

Fitting 10 folds for each of 25 candidates, totalling 250 fits
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.954, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.938, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.938, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.954, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.969, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.954, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] .........

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.938, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=1.000, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.938, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.892, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.846, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.754, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.923, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] .

[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.954, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.938, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.938, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] ....... C=10, gamma=0.001, kernel=rbf, score=1.000, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.938, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.892, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.846, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] .

[CV] .......... C=500, gamma=1, kernel=rbf, score=0.954, total=   0.0s
[CV] C=500, gamma=1, kernel=rbf ......................................
[CV] .......... C=500, gamma=1, kernel=rbf, score=0.923, total=   0.0s
[CV] C=500, gamma=1, kernel=rbf ......................................
[CV] .......... C=500, gamma=1, kernel=rbf, score=0.938, total=   0.0s
[CV] C=500, gamma=1, kernel=rbf ......................................
[CV] .......... C=500, gamma=1, kernel=rbf, score=0.954, total=   0.0s
[CV] C=500, gamma=1, kernel=rbf ......................................
[CV] .......... C=500, gamma=1, kernel=rbf, score=0.969, total=   0.0s
[CV] C=500, gamma=1, kernel=rbf ......................................
[CV] .......... C=500, gamma=1, kernel=rbf, score=0.938, total=   0.0s
[CV] C=500, gamma=1, kernel=rbf ......................................
[CV] .......... C=500, gamma=1, kernel=rbf, score=0.923, total=   0.0s
[CV] C=500, gamma=1, kernel=rbf ......................................
[CV] .

[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.938, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.938, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.954, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.969, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.954, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.938, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.969, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] .

[Parallel(n_jobs=1)]: Done 250 out of 250 | elapsed:    2.4s finished


In [22]:
model_SVC = SVC(C=10,gamma=1, kernel='rbf')
model_SVC.fit(X_train,Y_train) 

predictions=model_SVC.predict(X_test)

print(accuracy_score(Y_test,predictions))
print(confusion_matrix(Y_test,predictions))
print(classification_report(Y_test,predictions))

0.9861111111111112
[[32  0]
 [ 1 39]]
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        32
           1       1.00      0.97      0.99        40

    accuracy                           0.99        72
   macro avg       0.98      0.99      0.99        72
weighted avg       0.99      0.99      0.99        72



#### Majority Voting Ensemble

In [24]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

In [27]:
models = [('LR', LogisticRegression(solver ='lbfgs', max_iter = 500)),
         ('SVC', SVC(C=10, gamma =1, kernel='rbf', probability=True)),
         ('DTC', DecisionTreeClassifier())]

In [28]:
ensemble = VotingClassifier(estimators = models, voting ='soft')
ensemble.fit(X_train, Y_train)
predictions = ensemble.predict(X_test)


print(accuracy_score(Y_test,predictions))
print(confusion_matrix(Y_test,predictions))
print(classification_report(Y_test,predictions))

0.9722222222222222
[[32  0]
 [ 2 38]]
              precision    recall  f1-score   support

           0       0.94      1.00      0.97        32
           1       1.00      0.95      0.97        40

    accuracy                           0.97        72
   macro avg       0.97      0.97      0.97        72
weighted avg       0.97      0.97      0.97        72



### Conclusion
In this implementation I used the dataset available at - https://www.kaggle.com/mohammadasimbluemoon/diabeticretinopathy-messidor-eyepac-preprocessed
Please cite these papers for the dataset usage:
1. https://jamanetwork.com/journals/jama/fullarticle/2588763
2. http://www.ias-iss.org/ojs/IAS/article/view/1155
3. https://doi.org/10.1001/jamaophthalmol.2013.1743

Diabetic Retinopathy is an eye-condition developed in patients with diabetics that gradually turn into blindness. This is a code implementation of the paper - **"Diabetic Retinopathy Detection using Texture Features and Ensemble Learning"** (Cite from here - https://doi.org/10.1109/TENSYMP50017.2020.9230600) 

The dataset I used was a little different from the original mentioned one. I have only taken 360 photos of the nomral eye and 360 images of proliferative DR condition. Thus, a total of 720 images to perform binary classification. I tried to make feature extraction method as similar as possible with the information given in the paper. The results are however better due to the varying components such as dataset, feature vector length, preprocessing, etc.
