In [1]:
from skimage.feature import graycomatrix, graycoprops
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pandas as pd
import glob

In [2]:
images_path = '/Users/joanna/datasci_281/final_project/HAM10000_images_preprocessed'
segmentation_path = '/Users/joanna/datasci_281/final_project/HAM10000_segmentations_lesion_tschandl'
metadata = pd.read_csv('HAM10000_metadata.csv')

In [3]:
# get paths for image and segmentation files
img_files = sorted(glob.glob('/'.join([images_path, '*.jpg'])))
segmentation_files = sorted(glob.glob('/'.join([segmentation_path, '*.png'])))
# get the image ID 
img_files_df = pd.DataFrame({'img_file_path': img_files})
img_files_df['image_id'] = img_files_df['img_file_path'].apply(lambda x: x.split('/')[-1].replace('.jpg', ''))
# get the segmentation ID
segmentation_files_df = pd.DataFrame({'segmentation_file_path': segmentation_files})
segmentation_files_df['image_id'] = segmentation_files_df['segmentation_file_path'].apply(lambda x: x.split('/')[-1].replace('_segmentation.png', ''))

In [4]:
# merge the dataframes to make one metadata file
metadata = pd.merge(metadata, img_files_df, left_on= 'image_id', right_on = 'image_id')
metadata = pd.merge(metadata, segmentation_files_df, left_on = 'image_id', right_on = 'image_id')

metadata.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,img_file_path,segmentation_file_path
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern,/Users/joanna/datasci_281/final_project/HAM100...,/Users/joanna/datasci_281/final_project/HAM100...
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern,/Users/joanna/datasci_281/final_project/HAM100...,/Users/joanna/datasci_281/final_project/HAM100...
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern,/Users/joanna/datasci_281/final_project/HAM100...,/Users/joanna/datasci_281/final_project/HAM100...
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern,/Users/joanna/datasci_281/final_project/HAM100...,/Users/joanna/datasci_281/final_project/HAM100...
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern,/Users/joanna/datasci_281/final_project/HAM100...,/Users/joanna/datasci_281/final_project/HAM100...


In [5]:
img_info = metadata[['image_id','img_file_path', 'segmentation_file_path']]
feature_df = pd.DataFrame()
for i in range(len(metadata)):
    img = cv2.imread(img_info['img_file_path'].iloc[i], cv2.IMREAD_GRAYSCALE)
    segmentation = cv2.imread(img_info['segmentation_file_path'].iloc[i], cv2.IMREAD_GRAYSCALE)
    img_filtered = cv2.bitwise_and(img, segmentation)

    distances = [1]
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]

    glcm = graycomatrix(img_filtered, distances=distances, angles=angles, symmetric=True, normed=True)
    contrast = graycoprops(glcm, 'contrast').mean()
    dissimilarity = graycoprops(glcm, 'dissimilarity').mean()
    homogeneity = graycoprops(glcm, 'homogeneity').mean()
    energy = graycoprops(glcm, 'energy').mean()
    correlation = graycoprops(glcm, 'correlation').mean()
    asm = graycoprops(glcm, 'ASM').mean()

    feature = pd.DataFrame({'image_id': [img_info['image_id'].iloc[i]],
                            'contrast': [contrast],
                            'dissimilarity': [dissimilarity],
                            'homogeneity': [homogeneity],
                            'energy': [energy],
                            'correlation': [correlation],
                            'asm': [asm]})
    
    feature_df = pd.concat([feature_df, feature], ignore_index= True)

KeyboardInterrupt: 

In [None]:
feature_df

In [54]:
feature_df.set_index('image_id').corr()

Unnamed: 0,contrast,dissimilarity,homogeneity,energy,correlation,asm
contrast,1.0,0.835423,-0.752026,-0.737164,0.405116,-0.793849
dissimilarity,0.835423,1.0,-0.956271,-0.912854,0.297362,-0.914154
homogeneity,-0.752026,-0.956271,1.0,0.988287,-0.33422,0.969656
energy,-0.737164,-0.912854,0.988287,1.0,-0.365781,0.982439
correlation,0.405116,0.297362,-0.33422,-0.365781,1.0,-0.438754
asm,-0.793849,-0.914154,0.969656,0.982439,-0.438754,1.0


Probably only want to take contrast, dissimilarity, homogeneity, and correlation. May even want to not take dissimiliarty.

In [56]:
feature_df.to_csv('glcm.csv', index = False)