In [1]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])?  y


In [2]:
import warnings
warnings.filterwarnings('ignore')

# all import statements
import numpy as np
import pandas as pd
import pydicom as pyd
import os
import matplotlib.pyplot as plt
import mudicom

from os.path import dirname, join
from pydicom.data import get_testdata_files
from pydicom.filereader import read_dicomdir
from PIL import Image

from scipy.misc import imresize
from scipy.signal import convolve2d
import scipy
from skimage.segmentation import slic
from skimage.segmentation import mark_boundaries

from numpy import newaxis

from skimage.measure import shannon_entropy
from skimage import io, color, img_as_ubyte
from skimage.feature import greycomatrix, greycoprops
from sklearn.metrics.cluster import entropy


import math
from skimage.measure import label, regionprops

from skimage.filters import threshold_otsu
from skimage.segmentation import clear_border
from skimage.measure import label, regionprops
from skimage.morphology import closing, square
from skimage.color import label2rgb

import matplotlib.patches as mpatches

import statistics

from scipy import ndimage as ndi
from skimage.morphology import watershed
from skimage.feature import peak_local_max

import cv2
from skimage import img_as_float
from skimage import exposure

In [3]:
import pickle

from tqdm import tqdm
import os
import sys

In [4]:
def load_pickle(fileName):
    with open(fileName, "rb") as fp:
        file = pickle.load(fp)
    return file 

def write_pickle(file, fileName):
    with open(fileName, "wb") as fp:
        pickle.dump(file, fp)

### GLCM

In [5]:

def s_entropy(dcm):
    return shannon_entropy(dcm)

def entropy_simple(dcm):
    return entropy(dcm)

class glcm:
    def __init__(self, image):
        distance = [1, 2, 3]
        angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
        self.image = img_as_ubyte(image.astype('int64'))
        self.glcm_mat = greycomatrix(self.image, distances = distance, angles = angles, symmetric = True, normed = True)
        self.properties = ['correlation', 'homogeneity', 'contrast', 'energy']
            
    def correlation(self):
        return greycoprops(self.glcm_mat, 'correlation').flatten()
    
    def homogeneity(self):
        return greycoprops(self.glcm_mat, 'homogeneity').flatten()
    
    def contrast(self):
        return greycoprops(self.glcm_mat, 'contrast').flatten()
    
    def energy(self):
        return greycoprops(self.glcm_mat, 'energy').flatten()
    
    def glcm_all(self):
        return np.hstack([greycoprops(self.glcm_mat, props).ravel() for props in self.properties])
    

### Region Props

In [6]:
# thresh_ada = threshold_otsu(image_adahist)
# bw_ada = closing(image_adahist > thresh_ada, square(3))
# bw_clear_ada = clear_border(bw_ada)

class region_props:
    def __init__(self, image):
        self.image = image
        self.thresh = threshold_otsu(self.image)
        self.bw = closing(self.image > self.thresh, square(3))
        self.bw_clear = clear_border(self.bw)
        self.bw_label = label(self.bw_clear)
        self.regions = regionprops(self.bw_label)
#         self.idx = 0
        
    def plot_image_bw(self):
        plt.imshow(self.bw, cmap = plt.cm.bone)
    
    def plot_image(self):
        plt.imshow(self.bw_clear, cmap=plt.cm.bone)
        
    def plot_image_with_labels(self):
        plt.imshow(self.bw_label, cmap=plt.cm.bone)
        
    def max_area(self):
        lista = []
        for e in self.regions:
            lista.append(e.area)
        idx = lista.index(max(lista))
        return max(lista)
    
    def eccentricity(self):
        
        lista = []
        for e in self.regions:
            lista.append(e.area)
        idx = lista.index(max(lista))
        return self.regions[idx].eccentricity
    
    def euler_number(self):
        lista = []
        for e in self.regions:
            lista.append(e.area)
        idx = lista.index(max(lista))
        return self.regions[idx].euler_number
    
    def solidity(self):
        lista = []
        for e in self.regions:
            lista.append(e.area)
        idx = lista.index(max(lista))
        return self.regions[idx].solidity
    
    def perimeter(self):
        lista = []
        for e in self.regions:
            lista.append(e.perimeter)
        return max(lista)
    
    def mean_area(self):
        lista = []
        for e in self.regions:
            lista.append(e.area)
        return statistics.mean(lista)
    
    def std_area(self):
        lista = []
        for e in self.regions:
            lista.append(e.area)
        if len(lista)>2:
            return statistics.stdev(lista)
        else:
            return -1
    
    def thresh_img(self):
        return self.thresh
        

### Moments

In [7]:
class moments:
    
    def __init__(self, image):
        self.image = image
        self.moment = cv2.moments(self.image)
        self.hu = cv2.HuMoments(self.moment)
        
    def get_moments(self):
#         keys = [key for key in self.moment.keys()]
        values = [value for value in self.moment.values()]
        return values
    
    def get_HuMoments(self):
        moments_hu = []
        for m in range(len(self.hu)):
            moments_hu.append(self.hu[m][0])
        return moments_hu

#### DataFrame dcm_1

In [8]:
dcm_4 = load_pickle("dcm-4")
pat_4 = load_pickle("pat-4")

In [9]:
len(dcm_4), len(pat_4)

(4000, 4000)

In [10]:
data_dcm_4 = dict()
for dc in tqdm(range(len(dcm_4))):
    data_dcm_4[pat_4[dc]] = [s_entropy(dcm_4[dc]), entropy_simple(dcm_4[dc]), moments(dcm_4[dc]).get_moments(), moments(dcm_4[dc]).get_HuMoments(),
                      region_props(dcm_4[dc]).max_area(), region_props(dcm_4[dc]).eccentricity(), region_props(dcm_4[dc]).euler_number(),
                      region_props(dcm_4[dc]).solidity(), region_props(dcm_4[dc]).perimeter(), region_props(dcm_4[dc]).mean_area(), 
                      region_props(dcm_4[dc]).std_area(), region_props(dcm_4[dc]).thresh_img(), glcm(dcm_4[dc]).correlation(), glcm(dcm_4[dc]).homogeneity(),
                      glcm(dcm_4[dc]).contrast(), glcm(dcm_4[dc]).energy()]

100%|██████████| 4000/4000 [1:04:03<00:00,  1.03it/s]


In [11]:
df_dcm_4 = pd.DataFrame(data_dcm_4).T

In [12]:
df_dcm_4.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
a25056f9-2c4e-42f3-a035-26552647dcd4,7.57027,5.24731,"[128892357.0, 65713318521.0, 76088608635.0, 42...","[0.001099271230659824, 5.594577377680552e-09, ...",698,0.920496,1,0.545312,240.711,12,70.8378,121,"[0.9984929396892397, 0.9976594855013821, 0.998...","[0.42121093161166145, 0.3575495515004329, 0.43...","[14.613001550281036, 22.667833380632555, 10.55...","[0.07580800979368561, 0.07451888797508278, 0.0..."
b3332044-d191-4d1d-8cca-b11c30c79fa8,7.38902,5.12168,"[115844069.0, 60297790826.0, 68209896500.0, 37...","[0.0011334693283760783, 2.4447627594764187e-08...",817,0.918903,0,0.615674,241.196,11,67.5722,110,"[0.998889131384224, 0.9982132804883765, 0.9990...","[0.45703023708527035, 0.40038713050653396, 0.4...","[11.886168896627572, 19.099432504975965, 9.878...","[0.12717272531287524, 0.1260866277257114, 0.12..."
5620c301-4fee-478e-bdb0-124a00010443,7.84394,5.437,"[130198557.0, 69209327160.0, 75402798161.0, 45...","[0.0011880252276057833, 1.3954073768040174e-08...",73734,0.934456,-10,0.571055,3305.62,1078,8811.07,119,"[0.9988293155916104, 0.9978037814432585, 0.998...","[0.4440672291807069, 0.36771548306690616, 0.46...","[10.07103227333822, 18.880716157889545, 10.841...","[0.026992073835798006, 0.023932954659757547, 0..."
82e868a8-8b23-466e-8e4a-586356ed32f8,7.23362,5.01396,"[119480999.0, 59343300356.0, 66960008667.0, 37...","[0.001252005741187496, 1.2299178840210426e-08,...",811,0.969666,0,0.787379,259.581,42,142.583,86,"[0.9967030369867836, 0.9930968045618012, 0.995...","[0.6251839628472105, 0.5584210221297989, 0.669...","[13.44803503787879, 28.103667456898, 16.694222...","[0.04672918406348799, 0.04234998443956818, 0.0..."
b65d0b26-25c6-4d63-b070-518f366f95e8,7.34651,5.09221,"[177009846.0, 91796122663.0, 91656267226.0, 61...","[0.0009570636653469728, 4.207825145197353e-09,...",1168,0.951322,0,0.40882,416.392,25,114.298,154,"[0.9973147690039905, 0.9960315492054174, 0.998...","[0.500387670875001, 0.43884166188381124, 0.556...","[13.23560071480939, 19.55008413527001, 8.51788...","[0.037159382570885885, 0.03340332503730002, 0...."


In [13]:
df_dcm_4.to_pickle('df_dcm_4')

In [14]:
df_dcm_4.to_csv('df_dcm_4.csv')