## 2024 07/29 Feature Engineering

*Last Updated*: 2024-07-29

### Authors
* Nicole Tin


### Overview
An attempt to characterize wrinkle, roughness, and explore output texture maps in hand images. 

### Key Results
- ...

### Continuing Thoughts 
- ...

In [1]:
# -- Imports
import os
import cv2
import pandas as pd
from dermaml import features
from dermaml import image

# from dermaml import data
# import PIL
# from PIL import Image
# import mlflow # creates error
import matplotlib.pyplot as plt
# import skimage

# -- Feature Eng Imports
import numpy as np
import pyfeats
from skimage.feature import hessian_matrix, hessian_matrix_eigvals



In [2]:
# Dataset
dataset_name = "11khands"

# AutoML
experiment_name = "11khands-automl-sample-test_NT"
num_best_models = 5
random_seed = 42

# Paths
root = '/Users/nicole/Documents/GitHub/DermaML'
image_folder = '/data/source/Hands/'
csv_file = '/data/source/HandInfo.csv'

In [5]:
# -- Read the CSV file
df = pd.read_csv(root+ csv_file)
dorsals = df.loc[df['aspectOfHand'] == 'dorsal right']
# dorsals = dorsals.drop_duplicates(subset=['id'])

## -- subset images further: 3 of each skin color
# dorsals = dorsals.groupby('skinColor').tail(3)

In [7]:
# -- Read Images

def read_local(image_fnames, image_dir):
    images = []
    for filename in image_fnames:
        img = cv2.imread(os.path.join(image_dir, filename))
        if (img is not None) & (len(img.shape) > 2):
            images.append(img)
    return images

# ds = hub.load('hub://activeloop/11k-hands')
# images = ds.images

image_fnames = dorsals.loc[:, 'imageName']
images = read_local(image_fnames, root+image_folder)

In [8]:
# -- Image Preprocessing (brightness)

def remove_brightness(image):
    '''
    Converts an RGB-channeled image to HSV/HSB and removes the 'value' or 'brightness' channel.

    Arguments
    ---------
    `image`: an RGB numpy array
    '''
    assert len(image.shape) == 3
    hsv_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    hsv_image[:, :, 2] = 0
    return hsv_image


def detect_ridges(gray, sigma=1.0):
  H_elems = hessian_matrix(gray, sigma=sigma)
  maxima_ridge, minima_ridge = hessian_matrix_eigvals(H_elems)
  return maxima_ridge, minima_ridge

# iso_images = [image.remove_background(im) for im in images]
# hs_images = [remove_brightness(im) for im in images]

In [9]:
## Feature Computations

# Image transformations
ind = 0

def _engineer_features(img):
    
    # original_image = image.remove_background(img)
    original_image = img
    hsv_image = remove_brightness(img)
    bw_image = hsv_image[:,:,1]
    mask = cv2.cvtColor(original_image, cv2.COLOR_RGBA2GRAY) != 0
    hessian_image_a, hessian_image_b = detect_ridges(bw_image) 
    red_channel = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)[:,:,0]

    engineered_features = {}

    # relative redness
    relative_red_mean, relative_red_std = np.mean(red_channel), np.std(red_channel)
    red_labels = ['relative_redness_mean', 'relative_redness_std']
    red_values = [relative_red_mean, relative_red_std]
    red_features = {k:v for k,v in zip(red_labels, red_values)}

    # lbp_
    lbp_hist,lbp = features.compute_lbp(hsv_image)
    enum_lbp = dict(enumerate(lbp_hist))
    lbp_features = {'lbp_'+str(k):v for k,v in enum_lbp.items()}


    # glcm_whole_image
    contrast, correlation, energy, homogeneity = features.compute_glcm(hsv_image)
    glcm_labels = ['contrast', 'correlation', 'energy', 'homogeneity']
    glcm_values = [contrast[0][0], correlation[0][0], energy[0][0], homogeneity[0][0]]
    glcm_scikit_features = {str(k)+'_scikit':v for k,v in zip(glcm_labels, glcm_values)}

    # (pyfeats) glcm
    features_mean, features_range, labels_mean, labels_range = pyfeats.glcm_features(bw_image, ignore_zeros=True)
    glcm_pyfeats_features = {str(k)+'_pyfeats':v for k,v in zip(labels_mean, features_mean)}

    # glds
    glds_values, glds_labels = pyfeats.glds_features(bw_image, mask, Dx=[0,1,1,1], Dy=[1,1,0,-1])
    glds_features = {str(k)+'_glds':v for k,v in zip(glds_labels, glds_values)}


    # ngtdm
    ngtdm_values, ngtdm_labels = pyfeats.ngtdm_features(bw_image, mask, d=1)
    ngtdm_features = {str(k)+'_ngtdm':v for k,v in zip(ngtdm_labels, ngtdm_values)}

    # lte
    lte_values, lte_labels, = pyfeats.lte_measures(bw_image, mask,)
    lte_features = {str(k)+'_lte':v for k,v in zip(lte_labels, lte_values)}

    # unnormalized hessian ridges (wrinkles)
    a_lim = lambda a : np.mean(a) + 2*(np.std(a))
    mask_area = np.count_nonzero(mask)

    hessian_ridges = hessian_image_a >= a_lim(hessian_image_a)
    hessian_ridge_value = np.count_nonzero(hessian_ridges)
    hessian_values = [hessian_ridge_value, hessian_ridge_value/mask_area]
    hessian_label = ['skin_folds_hessian', 'skin_folds_hessian_pct_mask']
    hessian_features = {k:v for k,v in zip(hessian_label, hessian_values)}

    # feature engineering update
    engineered_features.update(red_features)
    engineered_features.update(lbp_features)
    engineered_features.update(glcm_scikit_features)
    engineered_features.update(glcm_pyfeats_features)
    engineered_features.update(glds_features)
    engineered_features.update(ngtdm_features)
    engineered_features.update(lte_features)
    engineered_features.update(hessian_features)

    return engineered_features



In [None]:
img_features = []

for img in images:
    img_features += [_engineer_features(img)]

In [10]:
df = pd.DataFrame(img_features)
df.loc[:,'age'] = dorsals.loc[:,'age']

  H_elems = hessian_matrix(gray, sigma=sigma)


KeyboardInterrupt: 

In [None]:
file_11k_features = '2024-07-29_NT_11k-Hands-Features.csv'
df.to_csv(file_11k_features)

In [11]:
df

Unnamed: 0,relative_redness_mean,relative_redness_std,lbp_0,lbp_1,lbp_2,lbp_3,lbp_4,lbp_5,lbp_6,lbp_7,...,NGTDM_Complexity_ngtdm,NGTDM_Strngth_ngtdm,LTE_LL_7_lte,LTE_EE_7_lte,LTE_SS_7_lte,LTE_LE_7_lte,LTE_ES_7_lte,LTE_LS_7_lte,skin_folds_hessian,skin_folds_hessian_pct_mask
0,20.023274,43.926335,0.018591,0.021087,0.017772,0.021851,0.034509,0.04266,0.02494,0.023831,...,127279.110247,1079731.0,164923.336,1503.75264,254.583944,11018.584643,588.319961,3280.491967,85322,0.225627
1,38.942323,78.363402,0.013616,0.01626,0.01792,0.025969,0.043764,0.055943,0.030334,0.028384,...,32298.079429,3998134.0,87020.64735,412.633134,57.151435,5438.696259,141.706925,1256.450691,48714,0.117642
2,22.745503,50.069519,0.01933,0.019286,0.012933,0.014019,0.022114,0.027611,0.016499,0.018029,...,105348.070008,1123310.0,133346.599618,1101.181091,211.259133,8409.454594,469.291179,2525.153158,89289,0.24558
3,26.738475,63.198768,0.009849,0.011809,0.01318,0.019649,0.035844,0.043171,0.023033,0.018365,...,48070.145541,2947392.0,162216.257133,887.25693,147.52476,8084.071615,344.843527,2430.136381,33419,0.089943
4,18.042694,40.219612,0.017557,0.017286,0.01144,0.011879,0.02037,0.022891,0.013755,0.014149,...,140108.880857,1078538.0,141210.873336,1502.617708,277.081195,9822.991939,634.422964,3244.811403,84215,0.231376
5,48.031938,89.685166,0.013698,0.015097,0.015623,0.020259,0.034156,0.03786,0.021193,0.017819,...,38346.791843,3496570.0,97184.832465,534.981997,75.814418,5668.602741,198.224509,1485.289857,37804,0.081705
6,37.946956,67.058024,0.022138,0.022539,0.014717,0.016389,0.026763,0.03021,0.018917,0.018928,...,92076.484789,1078403.0,123150.357248,1062.258567,209.782472,7455.724609,460.718199,2363.679316,82063,0.161979
7,25.196644,51.342237,0.020181,0.019094,0.011187,0.011623,0.020191,0.023425,0.013341,0.013854,...,129219.412122,910452.4,134393.427054,1381.382625,270.361153,10320.880884,593.285606,3165.589608,74062,0.177877
8,30.734789,54.677111,0.022468,0.022873,0.014967,0.015395,0.024441,0.027258,0.017718,0.017029,...,124921.846889,965903.1,149533.121854,1321.567243,251.463364,8550.844917,561.16015,2818.46712,100418,0.200978
9,21.843573,45.691185,0.018097,0.018064,0.011209,0.0114,0.018042,0.020184,0.012608,0.012922,...,126507.124911,919086.7,145045.116793,1400.474613,268.217151,9040.474365,602.321862,3113.304053,91192,0.230268
