# Import Library

In [1]:
import numpy as np
import cv2
import pandas as pd
from pathlib import Path
from skimage.feature import greycomatrix, greycoprops
import os
import re

In [2]:
pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)
pd.set_option('display.width',None)

In [3]:
#Gray Level Co-occurence Matrix
# -------------------- Utility function ------------------------
def normalize_label(str_):
    str_ = str_.replace(" ", "")
    str_ = str_.translate(str_.maketrans("","", "()"))
    str_ = str_.split("_")
    return ''.join(str_[:2])

def normalize_desc(folder, sub_folder):
    text = folder + " - " + sub_folder 
    text = re.sub(r'\d+', '', text)
    text = text.replace(".", "")
    text = text.strip()
    return text

def print_progress(val, val_len, folder, sub_folder, filename, bar_size=10):
    progr = "#"*round((val)*bar_size/val_len) + " "*round((val_len - (val))*bar_size/val_len)
    if val == 0:
        print("", end = "\n")
    else:
        print("[%s] folder : %s/%s/ ----> file : %s" % (progr, folder, sub_folder, filename), end="\r")
      
def label_nama(nama_label):
    if nama_label == 'LBP_yes':
        return 1
    else:
        return 0

# -------------------- Load Dataset ------------------------
 
dataset_dir ="brain_tumor_dataset/preprocess/"

imgs = [] #list image matrix 
labels = []
descs = []
for folder in os.listdir(dataset_dir):
    for sub_folder in os.listdir(os.path.join(dataset_dir, folder)):
        sub_folder_files = os.listdir(os.path.join(dataset_dir, folder, sub_folder))
        len_sub_folder = len(sub_folder_files) - 1
        for i, filename in enumerate(sub_folder_files):
            img = cv2.imread(os.path.join(dataset_dir, folder, sub_folder, filename))
            
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            
            # h, w = gray.shape
            # ymin, ymax, xmin, xmax = h//3, h*2//3, w//3, w*2//3
            # crop = gray[ymin:ymax, xmin:xmax]
            
            # resize = cv2.resize(crop, (0,0), fx=0.5, fy=0.5)
            
            imgs.append(gray)
#             labels.append(normalize_label(os.path.splitext(filename)[0]))
            nama_label = os.path.splitext(sub_folder)[0]
            labels.append(label_nama(nama_label))
            descs.append(normalize_desc(folder, sub_folder))
            
            print_progress(i, len_sub_folder, folder, sub_folder, filename)


[##########] folder : data_LBP/LBP_no/ ----> file : image.jpg.jpgg
[##########] folder : data_LBP/LBP_yes/ ----> file : m3 (146).jpggg

In [4]:
# cv2.imshow("test img", imgs[0])

# cv2.waitKey(0)
# cv2.destroyAllWindows()
print(labels)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [5]:
# ----------------- calculate greycomatrix() & greycoprops() for angle 0, 45, 90, 135 ----------------------------------
def calc_glcm_all_agls(img, label, props, dists=[5], agls=[0, np.pi/4, np.pi/2, 3*np.pi/4], lvl=256, sym=True, norm=True):
    
    glcm = greycomatrix(img, 
                        distances=dists, 
                        angles=agls, 
                        levels=lvl,
                        symmetric=sym, 
                        normed=norm)
    feature = []
    glcm_props = [propery for name in props for propery in greycoprops(glcm, name)[0]]
    for item in glcm_props:
            feature.append(item)
    feature.append(label) 
    
    return feature


# ----------------- call calc_glcm_all_agls() for all properties ----------------------------------
# properties = ['dissimilarity', 'correlation', 'homogeneity', 'contrast', 'ASM', 'energy']
properties = ['contrast','correlation', 'ASM', 'homogeneity']
glcm_all_agls = []
for img, label in zip(imgs, labels): 
    glcm_all_agls.append(
            calc_glcm_all_agls(img, 
                                label, 
                                props=properties)
                            )
 
columns = []
angles = ['0', '45', '90','135']
for name in properties :
    for ang in angles:
        columns.append(name + "_" + ang)
        
columns.append("label")

In [6]:
# Create the pandas DataFrame for GLCM features data
glcm_df = pd.DataFrame(glcm_all_agls, 
                      columns = columns)

#save to csv
glcm_df.to_csv("glcm_brain_tumor_low.csv", index=False)

glcm_df

Unnamed: 0,contrast_0,contrast_45,contrast_90,contrast_135,correlation_0,correlation_45,correlation_90,correlation_135,ASM_0,ASM_45,ASM_90,ASM_135,homogeneity_0,homogeneity_45,homogeneity_90,homogeneity_135,label
0,7050.585175,7213.745777,6618.020475,7452.814988,0.531943,0.522253,0.558608,0.506418,0.248011,0.244061,0.252982,0.243665,0.523693,0.516824,0.533031,0.516009,0
1,12783.909209,12502.021891,11675.925277,13171.9593,0.266928,0.283589,0.33179,0.245142,0.021851,0.020762,0.024232,0.020709,0.181596,0.17743,0.197428,0.175022,0
2,8273.799345,8342.216536,8040.462102,8909.977949,0.491086,0.489384,0.505401,0.454631,0.174617,0.168504,0.175634,0.168113,0.448078,0.436015,0.451777,0.43572,0
3,7701.034983,7733.696608,7444.638011,8016.006939,0.503863,0.504421,0.520243,0.486328,0.220397,0.212881,0.22078,0.213062,0.496141,0.484588,0.497595,0.48415,0
4,11401.929469,10187.865127,9181.011107,12443.604467,0.310639,0.386448,0.449038,0.250612,0.019143,0.018388,0.021907,0.01843,0.198966,0.185534,0.207538,0.183324,0
5,14699.409944,13578.34682,12634.771051,14465.328057,0.110649,0.18124,0.242267,0.127721,0.004069,0.004718,0.007648,0.004711,0.093494,0.099302,0.127641,0.0972,0
6,11717.537627,11133.047948,10519.210588,11912.540232,0.318703,0.35541,0.389617,0.310276,0.059951,0.061858,0.07058,0.061794,0.273813,0.27474,0.299801,0.273891,0
7,12691.876542,12462.092517,11373.689062,13497.711651,0.2641,0.278145,0.340484,0.218149,0.022197,0.018953,0.022099,0.018569,0.1957,0.177228,0.200364,0.175392,0
8,8048.242689,6984.664468,6599.478011,8703.113107,0.483892,0.55425,0.576745,0.44458,0.188523,0.182975,0.190727,0.182977,0.469993,0.459873,0.47739,0.458028,0
9,8789.122079,8196.708204,7812.938497,9295.509953,0.463365,0.501182,0.521722,0.434312,0.1897,0.186599,0.195567,0.185951,0.463456,0.458318,0.474255,0.455714,0


In [7]:
X = glcm_df.drop('label', axis=1)
y = glcm_df[['label']]

In [8]:
#calculate average of GLCM feature in each degree 
cor = ['correlation_0','correlation_45','correlation_90','correlation_135']
homogen =['homogeneity_0','homogeneity_45','homogeneity_90','homogeneity_135']
cont = ['contrast_0','contrast_45','contrast_90','contrast_135']
ASM =  ['ASM_0','ASM_45','ASM_90','ASM_135']

X['correlation'] = X[cor].astype(float).mean(axis = 1)
X['homogeneity'] = X[homogen].astype(float).mean(axis=1)
X['contrast'] = X[cont].astype(float).mean(axis=1)
X['ASM'] = X[ASM].astype(float).mean(axis=1)

X


Unnamed: 0,contrast_0,contrast_45,contrast_90,contrast_135,correlation_0,correlation_45,correlation_90,correlation_135,ASM_0,ASM_45,ASM_90,ASM_135,homogeneity_0,homogeneity_45,homogeneity_90,homogeneity_135,correlation,homogeneity,contrast,ASM
0,7050.585175,7213.745777,6618.020475,7452.814988,0.531943,0.522253,0.558608,0.506418,0.248011,0.244061,0.252982,0.243665,0.523693,0.516824,0.533031,0.516009,0.529806,0.522389,7083.791604,0.24718
1,12783.909209,12502.021891,11675.925277,13171.9593,0.266928,0.283589,0.33179,0.245142,0.021851,0.020762,0.024232,0.020709,0.181596,0.17743,0.197428,0.175022,0.281862,0.182869,12533.453919,0.021889
2,8273.799345,8342.216536,8040.462102,8909.977949,0.491086,0.489384,0.505401,0.454631,0.174617,0.168504,0.175634,0.168113,0.448078,0.436015,0.451777,0.43572,0.485125,0.442897,8391.613983,0.171717
3,7701.034983,7733.696608,7444.638011,8016.006939,0.503863,0.504421,0.520243,0.486328,0.220397,0.212881,0.22078,0.213062,0.496141,0.484588,0.497595,0.48415,0.503714,0.490618,7723.844135,0.21678
4,11401.929469,10187.865127,9181.011107,12443.604467,0.310639,0.386448,0.449038,0.250612,0.019143,0.018388,0.021907,0.01843,0.198966,0.185534,0.207538,0.183324,0.349184,0.19384,10803.602543,0.019467
5,14699.409944,13578.34682,12634.771051,14465.328057,0.110649,0.18124,0.242267,0.127721,0.004069,0.004718,0.007648,0.004711,0.093494,0.099302,0.127641,0.0972,0.165469,0.104409,13844.463968,0.005286
6,11717.537627,11133.047948,10519.210588,11912.540232,0.318703,0.35541,0.389617,0.310276,0.059951,0.061858,0.07058,0.061794,0.273813,0.27474,0.299801,0.273891,0.343501,0.280561,11320.584099,0.063546
7,12691.876542,12462.092517,11373.689062,13497.711651,0.2641,0.278145,0.340484,0.218149,0.022197,0.018953,0.022099,0.018569,0.1957,0.177228,0.200364,0.175392,0.275219,0.187171,12506.342443,0.020454
8,8048.242689,6984.664468,6599.478011,8703.113107,0.483892,0.55425,0.576745,0.44458,0.188523,0.182975,0.190727,0.182977,0.469993,0.459873,0.47739,0.458028,0.514867,0.466321,7583.874569,0.1863
9,8789.122079,8196.708204,7812.938497,9295.509953,0.463365,0.501182,0.521722,0.434312,0.1897,0.186599,0.195567,0.185951,0.463456,0.458318,0.474255,0.455714,0.480145,0.462936,8523.569683,0.189454


In [9]:
dfx = X.iloc[:, 16:]
df = pd.DataFrame(dfx)
df

Unnamed: 0,correlation,homogeneity,contrast,ASM
0,0.529806,0.522389,7083.791604,0.24718
1,0.281862,0.182869,12533.453919,0.021889
2,0.485125,0.442897,8391.613983,0.171717
3,0.503714,0.490618,7723.844135,0.21678
4,0.349184,0.19384,10803.602543,0.019467
5,0.165469,0.104409,13844.463968,0.005286
6,0.343501,0.280561,11320.584099,0.063546
7,0.275219,0.187171,12506.342443,0.020454
8,0.514867,0.466321,7583.874569,0.1863
9,0.480145,0.462936,8523.569683,0.189454


In [10]:
df['label']=y
df

Unnamed: 0,correlation,homogeneity,contrast,ASM,label
0,0.529806,0.522389,7083.791604,0.24718,0
1,0.281862,0.182869,12533.453919,0.021889,0
2,0.485125,0.442897,8391.613983,0.171717,0
3,0.503714,0.490618,7723.844135,0.21678,0
4,0.349184,0.19384,10803.602543,0.019467,0
5,0.165469,0.104409,13844.463968,0.005286,0
6,0.343501,0.280561,11320.584099,0.063546,0
7,0.275219,0.187171,12506.342443,0.020454,0
8,0.514867,0.466321,7583.874569,0.1863,0
9,0.480145,0.462936,8523.569683,0.189454,0


In [11]:
df.to_csv("dataset_mri.csv", index=False)
