 <h1><center>Make DataFrame</center></h1>

# Import Data and Modules

In [1]:
# all import statements
import numpy as np
import pandas as pd
import pydicom as pyd
import os
import matplotlib.pyplot as plt
import mudicom

from os.path import dirname, join
from pydicom.data import get_testdata_files
from pydicom.filereader import read_dicomdir
from PIL import Image

from scipy.misc import imresize
from scipy.signal import convolve2d
import scipy
from skimage.segmentation import slic
from skimage.segmentation import mark_boundaries

from numpy import newaxis

import pickle

In [17]:
with open("DCM_NP", "rb") as fp:
    dcm = pickle.load(fp)

with open("DCM-ADAHIST", "rb") as fp:
    dcm_adahist = pickle.load(fp)

with open("DCM-EQ", "rb") as fp:
    dcm_eq = pickle.load(fp)
    
with open("dcm_target", "rb") as fp:
    dcm_target = pickle.load(fp)

def load_pickle(fileName):
    with open(fileName, "rb") as fp:
        file = pickle.load(fp)
    return file 

dcm_x = load_pickle("dcm_x")

dcm_y = load_pickle("dcm_y")

dcm_width = load_pickle("dcm_width")

dcm_height = load_pickle("dcm_height")

pat_id = load_pickle("PAT_IDS")

# Plot Helper Methods

In [3]:
def show(img, title=None):
    plt.imshow(img, cmap=plt.cm.bone)
    if title is not None: plt.title = title

def plots(ims, figsize=(12,6), rows=2, titles=None):
    f = plt.figure(figsize=figsize)
    cols = len(ims)//rows
    for i in range(len(ims)):
        sp = f.add_subplot(rows, cols, i+1)
        sp.axis('Off')
        if titles is not None: sp.set_title(titles[i], fontsize=16)
        plt.imshow(ims[i], cmap=plt.cm.bone)

# GLCM Class and functions

In [4]:
import cv2

In [5]:
from skimage.measure import shannon_entropy
from skimage import io, color, img_as_ubyte
from skimage.feature import greycomatrix, greycoprops
from sklearn.metrics.cluster import entropy



def s_entropy(dcm):
    return shannon_entropy(dcm)

def entropy_simple(dcm):
    return entropy(dcm)

class glcm:
    def __init__(self, image):
        distance = [1, 2, 3]
        angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
        self.image = img_as_ubyte(image.astype('int64'))
        self.glcm_mat = greycomatrix(self.image, distances = distance, angles = angles, symmetric = True, normed = True)
        self.properties = ['correlation', 'homogeneity', 'contrast', 'energy']
            
    def correlation(self):
        return greycoprops(self.glcm_mat, 'correlation').flatten()
    
    def homogeneity(self):
        return greycoprops(self.glcm_mat, 'homogeneity').flatten()
    
    def contrast(self):
        return greycoprops(self.glcm_mat, 'contrast').flatten()
    
    def energy(self):
        return greycoprops(self.glcm_mat, 'energy').flatten()
    
    def glcm_all(self):
        return np.hstack([greycoprops(self.glcm_mat, props).ravel() for props in self.properties])
    

# Mean from List

In [6]:
def mean_from_list(listname):
    listname = pd.Series(listname).fillna(0).tolist()
    counts = 0
    total = 0
    for e in listname:
        if e != 0:
            counts = counts + 1
            total = total + e
    return total/counts

# Region Properties Class and fuctions

In [7]:
import math
from skimage.measure import label, regionprops

from skimage.filters import threshold_otsu
from skimage.segmentation import clear_border
from skimage.measure import label, regionprops
from skimage.morphology import closing, square
from skimage.color import label2rgb

import statistics

class region_props:
    def __init__(self, image):
        self.image = image
        self.thresh = threshold_otsu(self.image)
        self.bw = closing(self.image > self.thresh, square(3))
        self.bw_clear = clear_border(self.bw)
        self.bw_label = label(self.bw_clear)
        self.regions = regionprops(self.bw_label)
#         self.idx = 0
        
    def plot_image_bw(self):
        plt.imshow(self.bw, cmap = plt.cm.bone)
    
    def plot_image(self):
        plt.imshow(self.bw_clear, cmap=plt.cm.bone)
        
    def plot_image_with_labels(self):
        plt.imshow(self.bw_label, cmap=plt.cm.bone)
        
    def max_area(self):
        lista = []
        for e in self.regions:
            lista.append(e.area)
        idx = lista.index(max(lista))
        return max(lista)
    
    def eccentricity(self):
        
        lista = []
        for e in self.regions:
            lista.append(e.area)
        idx = lista.index(max(lista))
        return self.regions[idx].eccentricity
    
    def euler_number(self):
        lista = []
        for e in self.regions:
            lista.append(e.area)
        idx = lista.index(max(lista))
        return self.regions[idx].euler_number
    
    def solidity(self):
        lista = []
        for e in self.regions:
            lista.append(e.area)
        idx = lista.index(max(lista))
        return self.regions[idx].solidity
    
    def perimeter(self):
        lista = []
        for e in self.regions:
            lista.append(e.perimeter)
        return max(lista)
    
    def mean_area(self):
        lista = []
        for e in self.regions:
            lista.append(e.area)
        return statistics.mean(lista)
    
    def std_area(self):
        lista = []
        for e in self.regions:
            lista.append(e.area)
        return statistics.stdev(lista)
    
    def thresh_img(self):
        return self.thresh
        

# WaterShed Segmentation

In [8]:
from scipy import ndimage as ndi
from skimage.morphology import watershed
from skimage.feature import peak_local_max

def water_seg(image, footprint = np.ones((3,3))):
    distance = ndi.distance_transform_edt(image)
    local_maxi = peak_local_max(distance, indices=False, footprint=footprint, labels=image)
    markers = ndi.label(local_maxi)[0]
    labels = watershed(-distance, markers, mask = image)
    plt.imshow(labels, cmap=plt.cm.gray)

# Moment Invariants

In [28]:
class moments:
    
    def __init__(self, image):
        self.image = image
        self.moment = cv2.moments(self.image)
        self.hu = cv2.HuMoments(self.moment)
        
    def get_moments(self):
#         keys = [key for key in self.moment.keys()]
        values = [value for value in self.moment.values()]
        return values
    
    def get_HuMoments(self):
        moments_hu = []
        for m in range(len(self.hu)):
            moments_hu.append(self.hu[m][0])
        return moments_hu

# Generating DataFrame

In [10]:
pat_id

['7be6b4de-afe9-43c0-a581-0f49608c8976',
 '2dcdd159-2889-48d3-a0ce-5c7b1086c49d',
 'd8e66874-305e-4c80-9b75-5e764eb718ff',
 '22f2d3ec-f7ea-4778-850d-bb111590202f',
 'cdaa07d4-4234-4cd2-b9bf-abbf5aed1bb4',
 '46c4f908-9292-437f-af42-4031fca621f2',
 '79fa156c-c4f5-478d-a14d-80cc4db5cb8d',
 '1caa4dac-4bac-419b-91d4-cac2d8408ccd',
 'e8a6ccf4-845e-4663-b561-008bdf13c7fd',
 '6a975635-1118-4c26-b613-43fd34ce8b16',
 'ebc9da35-fa1b-4cc4-b963-16731802af49',
 'ce30917f-c3bf-47fa-ab69-a1062f387f3a',
 '79cb4053-b062-4be9-8922-9585b170d8fc',
 '94a021aa-e34c-439f-b30d-0c5a1a0c8539',
 '19d2c68d-abe7-4575-9ee5-d84b9ff656be',
 '60e598ff-ee2b-4fa8-9e1e-6562dd19b00c',
 'fa2d2cf7-acfc-4c2a-a231-9e60abcf5d66',
 'c1e3eb82-c55a-471f-a57f-fe1a823469da',
 'c3b14740-9eb7-4b20-a9ce-e404cce4b8c7',
 'e6722c12-2875-4795-b377-badc4514d224',
 '11a2e0cb-ee1f-4d53-a211-b8faff6bf888',
 '505e3473-fe7f-4c3e-8684-b7f9ecf1d052',
 'ac710523-7dfb-4d63-821c-34b2d6ef03d7',
 '3a483825-5dc2-4318-b0a6-7378f155df2c',
 'd7571c7c-0a9d-

In [25]:
xx = dict()
for dc in range(len(dcm)):
    xx[pat_id[dc]] = [entropy_simple(dcm[dc])]

In [24]:
dcm[1]

array([255., 254., 253., ..., 255., 255., 139.])

In [69]:
import warnings
warnings.filterwarnings('ignore')

In [70]:
data_500 = dict()

for dc in range(len(dcm)):
    data_500[pat_id[dc]] = [s_entropy(dcm[dc]), entropy_simple(dcm[dc]), moments(dcm[dc]).get_moments(), moments(dcm[dc]).get_HuMoments(),
                      region_props(dcm[dc]).max_area(), region_props(dcm[dc]).eccentricity(), region_props(dcm[dc]).euler_number(),
                      region_props(dcm[dc]).solidity(), region_props(dcm[dc]).perimeter(), region_props(dcm[dc]).mean_area(), 
                      region_props(dcm[dc]).std_area(), region_props(dcm[dc]).thresh_img(), glcm(dcm[dc]).correlation(), glcm(dcm[dc]).homogeneity(),
                      glcm(dcm[dc]).contrast(), glcm(dcm[dc]).energy()
                     ]

In [30]:
data_500

{}

In [74]:
d_vals = [d for d in data_500.values()]

In [75]:
d_vals[0]

[7.366346218486498,
 5.105962112372331,
 [92240919.0,
  50704447353.0,
  63088033389.0,
  33806797822735.0,
  34793841909223.0,
  48507833852159.0,
  2.506428889920707e+16,
  2.302898984610215e+16,
  2.6812117595414812e+16,
  3.9605522711627576e+16,
  5934775197145.715,
  114613837169.02734,
  5358871330530.406,
  -43808662651692.0,
  -219122549235131.5,
  -9215688755842.0,
  -901718424475872.0,
  0.0006975205877345016,
  1.3470689016678442e-05,
  0.0006298339795352089,
  -5.361063392948949e-07,
  -2.6815013428143215e-06,
  -1.1277653468348368e-07,
  -1.1034734556131197e-05],
 [0.0013273545672697106,
  5.307314779860795e-09,
  8.980594245722594e-12,
  1.885561762196642e-10,
  7.592121246281127e-21,
  -1.2226160315336935e-14,
  1.601241494972407e-21],
 10718,
 0.7122170560070662,
 -2,
 0.8252232830304896,
 763.7594513539336,
 75,
 828.5734729038819,
 100.17578125,
 array([0.99882676, 0.99812255, 0.99888047, 0.99813966, 0.9980681 ,
        0.99812255, 0.99786396, 0.99813966, 0.99743181, 

In [77]:
d_vals[0][0], d_vals[0][1] # s_entropy, entropy

(7.366346218486498, 5.105962112372331)

In [82]:
d_vals[0][2], d_vals[0][3] # moments, humoments


([92240919.0,
  50704447353.0,
  63088033389.0,
  33806797822735.0,
  34793841909223.0,
  48507833852159.0,
  2.506428889920707e+16,
  2.302898984610215e+16,
  2.6812117595414812e+16,
  3.9605522711627576e+16,
  5934775197145.715,
  114613837169.02734,
  5358871330530.406,
  -43808662651692.0,
  -219122549235131.5,
  -9215688755842.0,
  -901718424475872.0,
  0.0006975205877345016,
  1.3470689016678442e-05,
  0.0006298339795352089,
  -5.361063392948949e-07,
  -2.6815013428143215e-06,
  -1.1277653468348368e-07,
  -1.1034734556131197e-05],
 [0.0013273545672697106,
  5.307314779860795e-09,
  8.980594245722594e-12,
  1.885561762196642e-10,
  7.592121246281127e-21,
  -1.2226160315336935e-14,
  1.601241494972407e-21])

In [96]:
d_vals[0][4], d_vals[0][5], d_vals[0][6], d_vals[0][7], d_vals[0][8], d_vals[0][9] #mean_area perimeter solidity euler_number eccentricity max_area 

(10718, 0.7122170560070662, -2, 0.8252232830304896, 763.7594513539336, 75)

In [95]:
d_vals[0][10], d_vals[0][11] # threshold std_dev

(828.5734729038819, 100.17578125)

In [91]:
 d_vals[0][12] # correlation

array([0.99882676, 0.99812255, 0.99888047, 0.99813966, 0.9980681 ,
       0.99812255, 0.99786396, 0.99813966, 0.99743181, 0.99711435,
       0.99717069, 0.99717132])

In [92]:
d_vals[0][13] #homogenity

array([0.47846904, 0.42590105, 0.48876651, 0.42858127, 0.3989946 ,
       0.42590105, 0.40800591, 0.42858127, 0.36726313, 0.36998612,
       0.38287398, 0.37416103])

In [93]:
d_vals[0][14] #contrast

array([12.4306087 , 19.87809702, 11.85324261, 19.6969181 , 20.47072224,
       19.87809702, 22.60002312, 19.6969181 , 27.21581553, 30.53421307,
       29.91432553, 29.9315193 ])

In [94]:
d_vals[0][15] #energy

array([0.09403051, 0.0912895 , 0.09347248, 0.09152096, 0.09102734,
       0.0912895 , 0.09063662, 0.09152096, 0.08918036, 0.08805008,
       0.08864295, 0.08841407])

In [97]:
lista = []

for i in range(len(d_vals)):
    for j in range(len(d_vals[i])):
        lista.append(d_vals[i][j])

In [98]:
lista

[7.366346218486498,
 5.105962112372331,
 [92240919.0,
  50704447353.0,
  63088033389.0,
  33806797822735.0,
  34793841909223.0,
  48507833852159.0,
  2.506428889920707e+16,
  2.302898984610215e+16,
  2.6812117595414812e+16,
  3.9605522711627576e+16,
  5934775197145.715,
  114613837169.02734,
  5358871330530.406,
  -43808662651692.0,
  -219122549235131.5,
  -9215688755842.0,
  -901718424475872.0,
  0.0006975205877345016,
  1.3470689016678442e-05,
  0.0006298339795352089,
  -5.361063392948949e-07,
  -2.6815013428143215e-06,
  -1.1277653468348368e-07,
  -1.1034734556131197e-05],
 [0.0013273545672697106,
  5.307314779860795e-09,
  8.980594245722594e-12,
  1.885561762196642e-10,
  7.592121246281127e-21,
  -1.2226160315336935e-14,
  1.601241494972407e-21],
 10718,
 0.7122170560070662,
 -2,
 0.8252232830304896,
 763.7594513539336,
 75,
 828.5734729038819,
 100.17578125,
 array([0.99882676, 0.99812255, 0.99888047, 0.99813966, 0.9980681 ,
        0.99812255, 0.99786396, 0.99813966, 0.99743181, 

In [99]:
sum(d_vals[1], [])

ValueError: operands could not be broadcast together with shapes (0,) (24,) 

In [100]:
# for sublist in l:
#     for item in sublist:
#         flat_list.append(item)

In [105]:
flat_list = []
for i in range(3):
    for sublist in d_vals[i]:
        for item in sublist:
            flat_list.append(item)

IndexError: invalid index to scalar variable.

In [106]:
from numpy import array

In [107]:
x = array(d_vals[1])

In [108]:
x

array([7.804127415277591, 5.4094089146302355,
       list([118773763.0, 57028122636.0, 68825079276.0, 35064274011868.0, 32837069228523.0, 50039081493888.0, 2.4277340084564708e+16, 1.974154927838267e+16, 2.3865428296288068e+16, 3.9649000697850696e+16, 7682748923817.574, -208655367944.90625, 10157449240242.18, 63952030931624.0, -376555347119155.5, 81443010363429.0, -1118573948397512.0, 0.0005445974561204557, -1.4790693242153885e-05, 0.0007200184558627777, 4.159614187951666e-07, -2.4492184870572467e-06, 5.297275105765137e-07, -7.27550946896762e-06]),
       list([0.0012646159119832335, 3.16475855769297e-08, 1.3816528240260898e-12, 9.546466136982774e-11, 2.493175479306957e-22, 1.6976833110507573e-14, 1.067662248501875e-21]),
       780, 0.9323808919267352, -2, 0.6483790523690773,
       309.90663761154804, 16, 83.03613671167511, 116.044921875,
       array([0.9985892 , 0.9973359 , 0.99818558, 0.99696743, 0.99625657,
       0.9973359 , 0.99469479, 0.99696743, 0.99387677, 0.9931622 ,
       

In [109]:
pd.DataFrame(x)

Unnamed: 0,0
0,7.80413
1,5.40941
2,"[118773763.0, 57028122636.0, 68825079276.0, 35..."
3,"[0.0012646159119832335, 3.16475855769297e-08, ..."
4,780
5,0.932381
6,-2
7,0.648379
8,309.907
9,16


In [113]:
pd.DataFrame(data_500).T.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
7be6b4de-afe9-43c0-a581-0f49608c8976,7.36635,5.10596,"[92240919.0, 50704447353.0, 63088033389.0, 338...","[0.0013273545672697106, 5.307314779860795e-09,...",10718,0.712217,-2,0.825223,763.759,75,828.573,100.176,"[0.9988267630814048, 0.9981225489241269, 0.998...","[0.4784690393800168, 0.4259010474717295, 0.488...","[12.430608695320135, 19.878097023589405, 11.85...","[0.09403051421430261, 0.09128950002810914, 0.0..."
2dcdd159-2889-48d3-a0ce-5c7b1086c49d,7.80413,5.40941,"[118773763.0, 57028122636.0, 68825079276.0, 35...","[0.0012646159119832335, 3.16475855769297e-08, ...",780,0.932381,-2,0.648379,309.907,16,83.0361,116.045,"[0.9985891998835106, 0.9973358959175828, 0.998...","[0.43372085077405403, 0.35326013611833573, 0.4...","[13.493244249450141, 25.448846615812855, 17.34...","[0.03328544296315335, 0.030327285573110596, 0...."
d8e66874-305e-4c80-9b75-5e764eb718ff,6.8283,4.73302,"[170334063.0, 85096727399.0, 91221672211.0, 56...","[0.0009818426448239955, 2.1278358422286364e-09...",912,0.935675,1,0.660391,200.018,17,98.6053,153.609,"[0.9957137994646331, 0.9867863821491449, 0.990...","[0.5852036010239059, 0.5117796098282055, 0.607...","[8.462962220491203, 25.907455025135455, 18.828...","[0.0510614483913255, 0.045511998024517275, 0.0..."
22f2d3ec-f7ea-4778-850d-bb111590202f,7.69593,5.33441,"[140242338.0, 62310789492.0, 77996053536.0, 36...","[0.0010279676066873146, 1.5714761019644863e-08...",341,0.879888,1,0.669941,100.142,7,34.5398,114.053,"[0.9985476796688767, 0.9971176416958397, 0.998...","[0.39719662527615573, 0.3305116441450096, 0.40...","[13.381106618096288, 26.531738728692645, 15.55...","[0.040514058469510505, 0.03853141907174066, 0...."
cdaa07d4-4234-4cd2-b9bf-abbf5aed1bb4,7.54643,5.23079,"[130111108.0, 68673721574.0, 76245952093.0, 47...","[0.001331840596963884, 2.515513779530961e-09, ...",568,0.915212,1,0.690158,260.332,42,123.288,112.061,"[0.9928517884301251, 0.9871516099477623, 0.993...","[0.608064329767518, 0.5248951911077396, 0.6166...","[38.36338816593354, 68.86618717684847, 36.0202...","[0.041865531996016635, 0.037771669350363746, 0..."
