# Extract Feature Steps

## Imports

In [1]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import os, os.path
import fnmatch
import numpy as np
from scipy import misc
import pandas as pd
from skimage import color
from skimage import data
from skimage import measure
from skimage.filters import threshold_adaptive
from skimage.filters import threshold_otsu
from skimage.feature.texture import greycomatrix
from skimage.feature.texture import greycoprops
import skimage
from scipy.stats import kurtosis
from scipy.stats import skew
#%pylab inline
%matplotlib inline

## Extract Statisitcal Features
stattisitcal features are:
* Mean
* Variance
* Skew
* Kurtosis

In [2]:
def getFlat(img, removeblack):
    data = img.flatten()
    if removeblack:
        index = np.argwhere(data==0)
        data = np.delete(data, index)
    return [np.mean(data), np.var(data), skew(data), kurtosis(data)]

## Convert Image to BitMap

Source Code http://tonysyu.github.io/scikit-image/auto_examples/plot_otsu.html

The threshould is picked by https://en.wikipedia.org/wiki/Otsu's_method

The image is then converted to bit map

In [3]:
def toBitMap(gray):
    thresh = threshold_otsu(gray)
    bitmap = np.dot((gray > thresh).astype(float),1)
    return bitmap

Count ratio between black and white ratios

In [4]:
def getBlackandWhiteCount(bitmaparray):
    total = len(bitmaparray.flatten())
    white = np.count_nonzero(bitmaparray.flatten())
    black = total-white
    return [black/total, white/total]

## GCLM Texture Features
Get Value of Texture src:http://scikit-image.org/docs/0.7.0/api/skimage.feature.texture.html

In [5]:
def GetCoMatrix(gray):
    newGray = (gray).astype(int)
    coMatrix = greycomatrix(newGray, [1], [0,np.pi/4,np.pi/2,3*np.pi/4], levels=256, normed=True, symmetric=True)
    return coMatrix

In [6]:
def GetTextureFeature(coMatrix,feature):
    featurevalues = greycoprops(coMatrix, feature)
    return featurevalues

In [7]:
def GetEntropy(coMatrix):
    I,J,D,A = coMatrix.shape
    shannon = np.zeros((D,A))
    for d in range(0,D):
        for a in range (0,A):
            shannon[d,a] = skimage.measure.shannon_entropy(coMatrix[:,:,d,a])
    return shannon

# Load Data

In [8]:
def files_within(directory_path, pattern="*"):
    for dirpath, dirnames, filenames in os.walk(directory_path):
        for file_name in fnmatch.filter(filenames, pattern):
            yield os.path.join(dirpath, file_name)

In [9]:
def LoadImages(path,ImgsType):
    imgs = []
    files = list(files_within(path,"*.jpg"))
    for file in files:
        #print(file)
        if ImgsType not in file: 
            continue
        img = Image.open(file)
        #img = img.resize((1024, 1024))
        img = np.array(img)
        #print(file," ",img.shape)
        imgs.append(img)
    #imgs = np.array(imgs)
    print(len(imgs))
    return imgs

# Create Data Set

### Selected Features are:
* All Ranges Mean Value
* All Range Var
* All Ranges Skew
* All Ranges kurtosis
* Red Ranges Mean Value
* Red Range Var
* Red Ranges Skew
* Red Ranges kurtosis
* Green Ranges Mean Value
* Green Range Var
* Green Ranges Skew
* Green Ranges kurtosis
* Blue Ranges Mean Value
* Blue Range var
* Blue Ranges Skew
* Blue Ranges kurtosis
* Gray Ranges Mean Value
* Gray Range var
* Gray Ranges Skew
* Gray Ranges kurtosis
-------------------
* Black Ratio
* White Ratio
* Origional Entropy
* Contrast Effect (4)
* Dissimilarity Effect (4)
* Homogeneity (4)
* Energy (4)
* Correlation (4)
* Entropy (4)

In [10]:
columns = ["Amean","Avar","ASkew", "AKurt",
           "Rmean","Rvar","RSkew", "RKurt",
           "Gmean","Gvar","GSkew", "GKurt",
           "Bmean","Bvar","BSkew", "BKurt",
           "Graymean","Grayvar","GraySkew", "GrayKurt",
           "BlackRatio","WhiteRatio",
           "Entropy",
           "Contrast0","Contrast45","Contrast90","Contrast135",
           "Dissimilarity0","Dissimilarity45","Dissimilarity20","Dissimilarity2PI/2",
           "Homogeneity0","Homogeneity45","Homogeneity90","Homogeneity135",
           "Energy0","Energy45","Energy20","Energy135",
           "Correlation0","Correlation45","Correlation90","Correlation135",
           "Entropy0","Entropy45","Entropy90","Entropy135"
          ]

In [11]:
def allWork(path,ImgsType, removeblack = True):
    imgs = LoadImages(path,ImgsType)
    data = []
    for i in range(0,len(imgs)):
        Astats = getStats(imgs[i],removeblack)
        Rstats = getStats(imgs[i][:,:,0],removeblack)
        Gstats = getStats(imgs[i][:,:,1],removeblack)
        Bstats = getStats(imgs[i][:,:,2],removeblack)
        Gray = color.rgb2gray(imgs[i])*255 #* 255 # * 255 due to scale issues (0-1) -> (0->255)
        Graystats = getStats(Gray,removeblack)
        
        #black and white ratios and entropy
        [b,w] = getBlackandWhiteCount(toBitMap(Gray))
        origionalentropy = skimage.measure.shannon_entropy(Gray)
        
        #texture features
        coMatrix = GetCoMatrix(Gray)
        contrast = GetTextureFeature(coMatrix,'contrast')
        dissimilarity = GetTextureFeature(coMatrix,'dissimilarity')
        homogeneity = GetTextureFeature(coMatrix,'homogeneity')
        energy = GetTextureFeature(coMatrix,'energy')
        correlation = GetTextureFeature(coMatrix,'correlation')
        entropy = GetEntropy(coMatrix)
    
        data.append([ Astats[0],Astats[1],Astats[2],Astats[3],  
                     Rstats[0],Rstats[1],Rstats[2],Rstats[3], 
                     Gstats[0],Gstats[1],Gstats[2],Gstats[3], 
                     Bstats[0],Bstats[1],Bstats[2],Bstats[3],
                     Graystats[0],Graystats[1],Graystats[2],Graystats[3],
                     b, w,
                     origionalentropy,
                     contrast[0][0], contrast[0][1], contrast[0][2],contrast[0][3],
                     dissimilarity[0][0],dissimilarity[0][1],dissimilarity[0][2],dissimilarity[0][3],
                     homogeneity[0][0],homogeneity[0][1],homogeneity[0][2],homogeneity[0][3],
                     energy[0][0],energy[0][1],energy[0][2],energy[0][3],
                     correlation[0][0],correlation[0][1],correlation[0][2],correlation[0][3],
                     entropy[0][0],entropy[0][1],entropy[0][2],entropy[0][3]
                    ])

    numpyarray = np.array(data)
    df = pd.DataFrame(numpyarray, columns = columns)
    df.to_csv(path+ImgsType+'.csv',index = False)
    return df

In [12]:
path = "."
removeblack = False
df = allWork(path,"Solar Data",removeblack)

6


NameError: name 'getStats' is not defined

In [13]:
#dfClean.head()

In [14]:
#df.dtypes

In [15]:
plt.figure(figsize=(10,10))
X = "Correlation10"
Y = "Energy10"
plt.scatter(dfClean[X],dfClean[Y],color='blue')
plt.scatter(dfMid[X],dfMid[Y],color='yellow')
plt.scatter(dfDirty[X],dfDirty[Y],color='red')

KeyError: 'Correlation10'

<Figure size 720x720 with 0 Axes>

# PlayGround