## 2024 07/30 AutoML - Hawkeye Hands

*Last Updated*: 2024-07-14

### Authors
* Nicole Tin


### Overview
This Jupyter notebook is intended to demonstrate ...


### Key Results

- ...

In [1]:
# --- Imports
import os
import cv2
import pandas as pd
from PIL import Image
from dermaml import features
from tqdm import tqdm

from pycaret import regression

# import mlflow # creates error
import matplotlib.pyplot as plt


In [2]:
# Dataset
dataset_name = "hawkeye-hands"

# AutoML
experiment_name = "NT_hawkeye-hands-texture-fraction"
num_best_models = 5
random_seed = 42

# Paths
root = '/Users/nicole/Documents/DermaML_local/hawkeye-hands-2024-07-29'
# src_dir = '/Users/nicole/Documents/DermaML_local/hawkeye-hands-2024-07-29/processed_images/'
features_dir =  '/Users/nicole/Documents/DermaML_local/hawkeye-hands-2024-07-29/features-2024-08-02/'

image_folder = '/processed_images/'
csv_file = '/metadata.csv'

In [65]:
# -- Read the CSV file
metadata = pd.read_csv(root+ csv_file)
metadata.loc[:, 'Age'] = 2024-metadata['birth_year']
valid_image_fnames_df = pd.DataFrame(metadata.set_index('Age').loc[:, ['right_hand_image_file', 'left_hand_image_file']].stack()).reset_index()
valid_image_fnames_df.columns = ['Age', 'handedness', 'filename']
valid_image_fnames = valid_image_fnames_df['filename'].to_numpy()

In [31]:
f_dermaml = pd.read_csv(features_dir+'2024-08-02_hawkeye-hands_dermaml-features.csv').drop(columns='Unnamed: 0')
f_pyfeats = pd.read_csv(features_dir+'2024-08-02_hawkeye-hands_pyfeats_textures.csv').drop(columns='Unnamed: 0')
f_redness = pd.read_csv(features_dir+'2024-08-02_hawkeye-hands-redness-features.csv').drop(columns='Unnamed: 0')
f_hessian = pd.read_csv(features_dir+'2024-08-02_hawkeye-hands-hessian-features.csv').drop(columns='Unnamed: 0')

X = f_redness.merge(f_hessian).merge(f_pyfeats).merge(f_dermaml)
# X.loc[:, 'Age'] = metadata[X['filename']]

In [73]:
valid_image_fnames_df.set_index('filename')

Unnamed: 0_level_0,Age,handedness
filename,Unnamed: 1_level_1,Unnamed: 2_level_1
67a6ac99-3bb9-44d9-bb5c-b2cbb5bca1a8.jpeg,26,right_hand_image_file
85e5965e-adff-4632-a9b9-3dbad1af9f39.jpeg,26,left_hand_image_file
4ae14bdf-a3c6-4aea-97c4-1122cd30d36f.jpeg,94,right_hand_image_file
16b45eff-7f1d-4437-b8fa-c7d4adbf4f92.jpeg,94,left_hand_image_file
49a1cb3b-2b96-4fd1-b3b1-628fb3b04536.jpeg,69,right_hand_image_file
...,...,...
f5dc7f18-b316-4279-96a9-c63e75e25b9a.jpeg,55,left_hand_image_file
37c2116c-9c15-4160-aa6c-e391923b037f.jpeg,18,right_hand_image_file
a4d6d145-8944-4fc7-99ff-51e8fda18a72.jpeg,18,left_hand_image_file
dcaef183-bb9c-4cda-b611-10d5050dfb5d.jpeg,18,right_hand_image_file


In [78]:
# X.set_index('filename').join(valid_image_fnames_df.set_index('filename'), on=['filename',], how='inner')
X.join(valid_image_fnames_df.set_index('filename'), on='filename', )

Unnamed: 0,relative_redness_mean,relative_redness_std,filename,GLCM_ASM_Mean_wrinkles_pyfeats,GLCM_Contrast_Mean_wrinkles_pyfeats,GLCM_Correlation_Mean_wrinkles_pyfeats,GLCM_SumOfSquaresVariance_Mean_wrinkles_pyfeats,GLCM_InverseDifferenceMoment_Mean_wrinkles_pyfeats,GLCM_SumAverage_Mean_wrinkles_pyfeats,GLCM_SumVariance_Mean_wrinkles_pyfeats,...,lbp_7,lbp_8,lbp_9,lbp_10,contrast_scikit,correlation_scikit,energy_scikit,homogeneity_scikit,Age,handedness
0,49.527017,87.354849,abc1df28-c46d-4d73-8892-a454d3bfc732.png,0.004391,59.059864,0.977429,1319.058779,0.464867,169.195511,5217.175252,...,0.014973,0.024414,0.755650,0.086060,235.026281,0.936256,0.724958,0.817446,,
1,53.417189,87.489842,558fe960-b067-4b76-90c5-f5a688e92458.png,0.003148,61.927220,0.972034,1116.904646,0.433753,170.879752,4405.691363,...,0.017398,0.025351,0.727272,0.089151,216.853706,0.943964,0.697230,0.785236,,
2,34.493744,56.946444,c11f1b7c-ce6e-4536-884c-256857c0dbe5.png,0.001001,137.018388,0.953948,1502.735875,0.255450,138.024718,5873.925111,...,0.018369,0.025615,0.714607,0.085818,340.831075,0.891314,0.686850,0.730223,,
3,59.869464,92.993415,d189c101-8afd-417f-8721-e235604ac7dd.png,0.003308,62.043170,0.963496,856.333432,0.419310,181.293609,3363.290557,...,0.019207,0.028030,0.706417,0.097868,289.337202,0.932467,0.673153,0.760343,,
4,47.553003,90.072842,299c1395-10d3-4388-a6b9-c0f9b6997847.png,0.004333,58.984742,0.964571,844.492495,0.515370,198.124721,3318.985240,...,0.016095,0.018738,0.779695,0.059406,194.017522,0.952759,0.757498,0.831494,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
571,59.869776,95.706273,93e2941a-f8fb-48e9-84ce-3634a7c19b24.png,0.006371,52.444514,0.967231,811.485694,0.540003,196.437295,3193.498261,...,0.019218,0.026581,0.723424,0.087770,212.034612,0.955335,0.689446,0.808613,,
572,46.272455,80.696601,4d1c8765-0d56-458c-82bd-83108056fdff.png,0.005604,69.616667,0.982084,1954.368942,0.594853,111.353090,7747.859099,...,0.019278,0.022123,0.758169,0.062155,270.749493,0.889693,0.722553,0.829624,,
573,67.569639,92.524804,6105a6d5-5f85-4bce-b5e0-ac07b9efb8f3.png,0.003386,51.516288,0.968389,820.963336,0.425271,178.344406,3232.337056,...,0.024153,0.032903,0.647135,0.110408,323.990450,0.928679,0.609239,0.708786,,
574,50.375913,71.450351,6e51e138-3413-4530-9c99-02caec69b5b1.png,0.002095,70.838354,0.983294,2131.072997,0.335324,155.855864,8453.453636,...,0.022512,0.029669,0.650991,0.095755,1052.181160,0.773463,0.612143,0.677742,,


In [84]:
set(valid_image_fnames).intersection(set(X['filename']))

set()

In [80]:
any(X.loc[:,'filename'].isin(valid_image_fnames))

False

In [None]:
# --- Perform AutoML Evaluation

# Set up the dataset for AutoML regression
regression.setup(data=X,
                 target="age",
                 log_experiment=True,
                 experiment_name=experiment_name,
                 session_id=random_seed,
                ) 

best_models = regression.compare_models(n_select=num_best_models, verbose=False)


Unnamed: 0,Description,Value
0,Session id,42
1,Target,age
2,Target type,Regression
3,Original data shape,"(189, 17)"
4,Transformed data shape,"(189, 20)"
5,Transformed train set shape,"(132, 20)"
6,Transformed test set shape,"(57, 20)"
7,Numeric features,15
8,Categorical features,1
9,Preprocess,True


In [None]:
regression.pull()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
huber,Huber Regressor,1.5529,25.7337,3.3507,-0.1169,0.1075,0.053,0.082
dummy,Dummy Regressor,1.7976,25.0128,3.4401,-0.6188,0.1119,0.0663,0.048
br,Bayesian Ridge,1.7972,25.0686,3.4444,-0.6221,0.1121,0.0662,0.049
lasso,Lasso Regression,1.8018,25.2983,3.4656,-0.651,0.113,0.0663,0.037
llar,Lasso Least Angle Regression,1.8018,25.2983,3.4656,-0.651,0.113,0.0663,0.041
en,Elastic Net,1.8009,25.3173,3.4667,-0.6512,0.1131,0.0663,0.042
omp,Orthogonal Matching Pursuit,1.7996,25.3353,3.4674,-0.6513,0.1131,0.0662,0.046
ridge,Ridge Regression,1.9516,27.8215,3.9106,-1.7525,0.1298,0.0729,0.058
lightgbm,Light Gradient Boosting Machine,2.1448,26.1486,3.8986,-2.1757,0.131,0.0828,0.478
et,Extra Trees Regressor,1.92,28.455,3.9347,-2.4186,0.1276,0.0727,0.212


In [None]:
tuned_models = [regression.tune_model(model, optimize='RMSE') for model in best_models]
ensem_models = [regression.ensemble_model(model, n_estimators=5, optimize='RMSE') for model in tuned_models]
tuned_blend = [regression.blend_models(model, optimize='RMSE') for model in tuned_models]
ensem_blend = [regression.blend_models(model, optimize='RMSE') for model in ensem_models]
model = regression.automl(optimize='RMSE')

In [None]:
regression.predict_model(model)

In [None]:
# final_model = regression.finalize_model(model)
# regression.save_model(final_model, 'hawkeye-hands-texture-fraction')

In [None]:
saved_modle = regression.load_model('hawkeye-hands-texture-fraction')