In [60]:
import numpy as np
import cv2
import pandas as pd
from skimage.filters import roberts, sobel, scharr, prewitt
from scipy import ndimage as nd
from sklearn.ensemble import RandomForestClassifier
import rasterio as rio
from sklearn.model_selection import train_test_split

In [9]:
img = cv2.imread('../../data/data/ortho_train.tif')

In [3]:
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

In [17]:
img.shape

(7068, 5160, 3)

In [47]:
imgB = img[:, :, 0]
imgG = img[:, :, 1]
imgR = img[:, :, 2]

In [48]:
imgB1 = imgB.reshape(-1)
imgG1 = imgG.reshape(-1)
imgR1 = imgR.reshape(-1)

In [36]:
df = pd.DataFrame()

In [37]:
df['original_imgB'] = imgB1
df['original_imgG'] = imgG1
df['original_imgR'] = imgR1

In [38]:
def Gabor_features(df, img, label):
    num = 1  #To count numbers up in order to give Gabor features a lable in the data frame
    kernels = []  #Create empty list to hold all kernels that we will generate in a loop
    for theta in range(2):   #Define number of thetas. Here only 2 theta values 0 and 1/4 . pi 
        theta = theta / 4. * np.pi
        for sigma in (1, 3):  #Sigma with values of 1 and 3
            for lamda in np.arange(0, np.pi, np.pi / 4):   #Range of wavelengths
                for gamma in (0.05, 0.5):   #Gamma values of 0.05 and 0.5

                    gabor_label = f'Gabor_{label}' + str(num)  #Label Gabor columns as Gabor1, Gabor2, etc.
                    print(gabor_label)
                    ksize=9
                    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)    
                    kernels.append(kernel)
                    #Now filter the image and add values to a new column 
                    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
                    filtered_img = fimg.reshape(-1)
                    df[gabor_label] = filtered_img  #Labels columns as Gabor1, Gabor2, etc.
                    print(gabor_label, ': theta=', theta, ': sigma=', sigma, ': lamda=', lamda, ': gamma=', gamma)
                    num += 1  #Increment for gabor column label
    return df

In [41]:
for ind, val in enumerate([imgB1, imgG1, imgR1]):
    labels = ['B', 'G' , 'R']
    df = Gabor_features(df, val, labels[ind])

Gabor_B1
Gabor_B1 : theta= 0.0 : sigma= 1 : lamda= 0.0 : gamma= 0.05
Gabor_B2
Gabor_B2 : theta= 0.0 : sigma= 1 : lamda= 0.0 : gamma= 0.5
Gabor_B3
Gabor_B3 : theta= 0.0 : sigma= 1 : lamda= 0.7853981633974483 : gamma= 0.05
Gabor_B4
Gabor_B4 : theta= 0.0 : sigma= 1 : lamda= 0.7853981633974483 : gamma= 0.5
Gabor_B5
Gabor_B5 : theta= 0.0 : sigma= 1 : lamda= 1.5707963267948966 : gamma= 0.05
Gabor_B6
Gabor_B6 : theta= 0.0 : sigma= 1 : lamda= 1.5707963267948966 : gamma= 0.5
Gabor_B7
Gabor_B7 : theta= 0.0 : sigma= 1 : lamda= 2.356194490192345 : gamma= 0.05
Gabor_B8
Gabor_B8 : theta= 0.0 : sigma= 1 : lamda= 2.356194490192345 : gamma= 0.5
Gabor_B9
Gabor_B9 : theta= 0.0 : sigma= 3 : lamda= 0.0 : gamma= 0.05
Gabor_B10
Gabor_B10 : theta= 0.0 : sigma= 3 : lamda= 0.0 : gamma= 0.5
Gabor_B11
Gabor_B11 : theta= 0.0 : sigma= 3 : lamda= 0.7853981633974483 : gamma= 0.05
Gabor_B12
Gabor_B12 : theta= 0.0 : sigma= 3 : lamda= 0.7853981633974483 : gamma= 0.5
Gabor_B13
Gabor_B13 : theta= 0.0 : sigma= 3 : lamda= 1

Gabor_R30 : theta= 0.7853981633974483 : sigma= 3 : lamda= 1.5707963267948966 : gamma= 0.5
Gabor_R31
Gabor_R31 : theta= 0.7853981633974483 : sigma= 3 : lamda= 2.356194490192345 : gamma= 0.05
Gabor_R32
Gabor_R32 : theta= 0.7853981633974483 : sigma= 3 : lamda= 2.356194490192345 : gamma= 0.5


In [44]:
df.head()

Unnamed: 0,original_imgB,original_imgG,original_imgR,Gabor_B1,Gabor_B2,Gabor_B3,Gabor_B4,Gabor_B5,Gabor_B6,Gabor_B7,...,Gabor_R23,Gabor_R24,Gabor_R25,Gabor_R26,Gabor_R27,Gabor_R28,Gabor_R29,Gabor_R30,Gabor_R31,Gabor_R32
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [46]:
def extract_features(df, img, label):
    
    #Canny Edge
    edges = cv2.Canny(img, 100,200)   #Image, min and max values
    edges1 = edges.reshape(-1)
    df[f'Canny Edge_{label}'] = edges1 #Add column to original dataframe
    
    #ROBERTS EDGE
    edge_roberts = roberts(img)
    edge_roberts1 = edge_roberts.reshape(-1)
    df[f'Roberts_{label}'] = edge_roberts1

    #SOBEL
    edge_sobel = sobel(img)
    edge_sobel1 = edge_sobel.reshape(-1)
    df[f'Sobel_{label}'] = edge_sobel1

    #SCHARR
    edge_scharr = scharr(img)
    edge_scharr1 = edge_scharr.reshape(-1)
    df[f'Scharr_{label}'] = edge_scharr1

    #PREWITT
    edge_prewitt = prewitt(img)
    edge_prewitt1 = edge_prewitt.reshape(-1)
    df[f'Prewitt_{label}'] = edge_prewitt1

    #GAUSSIAN with sigma=3
    gaussian_img = nd.gaussian_filter(img, sigma=3)
    gaussian_img1 = gaussian_img.reshape(-1)
    df[f'Gaussian_s3_{label}'] = gaussian_img1

    #GAUSSIAN with sigma=7
    gaussian_img2 = nd.gaussian_filter(img, sigma=7)
    gaussian_img3 = gaussian_img2.reshape(-1)
    df[f'Gaussian_s7_{label}'] = gaussian_img3

    #MEDIAN with sigma=3
    median_img = nd.median_filter(img, size=3)
    median_img1 = median_img.reshape(-1)
    df[f'Median_s3_{label}'] = median_img1

    #VARIANCE with size=3
    variance_img = nd.generic_filter(img, np.var, size=3)
    variance_img1 = variance_img.reshape(-1)
    df[f'Variance_s3_{label}'] = variance_img1  #Add column to original dataframe
    
    return df

In [49]:
for ind, val in enumerate([imgB, imgG, imgR]):
    labels = ['B', 'G' , 'R']
    df = extract_features(df, val, labels[ind])

In [54]:
with rio.open("../../data/data/tree_mask_train.tif") as tm:
    labeled_img = tm.read(1)

In [55]:
labeled_img

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [65]:
labeled_img1 = labeled_img.reshape(-1)
df['Labels'] = labeled_img1

In [57]:
df.dropna(inplace = True)

In [66]:
df['Labels']

0           0.0
1           0.0
2           0.0
3           0.0
4           0.0
           ... 
36470875    0.0
36470876    0.0
36470877    0.0
36470878    0.0
36470879    0.0
Name: Labels, Length: 36470880, dtype: float32

In [62]:
df.columns

Index(['original_imgB', 'original_imgG', 'original_imgR', 'Gabor_B1',
       'Gabor_B2', 'Gabor_B3', 'Gabor_B4', 'Gabor_B5', 'Gabor_B6', 'Gabor_B7',
       ...
       'Variance_s3_G', 'Canny Edge_R', 'Roberts_R', 'Sobel_R', 'Scharr_R',
       'Prewitt_R', 'Gaussian_s3_R', 'Gaussian_s7_R', 'Median_s3_R',
       'Variance_s3_R'],
      dtype='object', length=126)

In [61]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(labels = ['Labels'], axis=1, inplace = True), df[['Labels']])

KeyError: "None of [Index(['Labels'], dtype='object')] are in the [columns]"