## 2024 06/14 AutoML test

*Last Updated*: 2024-06-14

### Authors
* Nicole Tin (...)


### Overview
This Jupyter notebook is intended to demonstrate

* A...


### Key Results

- E...

In [1]:
from pycaret import regression

In [2]:
# --- Imports
# External packages
import os
import cv2
from pycaret import regression

from dermaml import data
from dermaml import features
import PIL
from PIL import Image
import numpy as np
# import mlflow # creates error
import matplotlib.pyplot as plt
import pandas as pd
import skimage


In [3]:
# Dataset
dataset_name = "11khands"

# AutoML
experiment_name = "11khands-automl-sample-test_NT"
num_best_models = 5
random_seed = 42

# Paths
root = '/Users/nicole/Documents/GitHub/DermaML'
image_folder = '/data/source/Hands/'
csv_file = '/data/source/HandInfo.csv'

In [4]:
# Read the CSV file
df = pd.read_csv(root+ csv_file)
dorsals = df.loc[df['aspectOfHand'] == 'dorsal right']

In [5]:
# Images

# ds = hub.load('hub://activeloop/11k-hands')
# images = ds.images

# image_files = os.listdir(root + image_folder)
image_files = dorsals.loc[:, 'imageName']

images = []
for filename in dorsals.loc[:, 'imageName']:
    img = cv2.imread(os.path.join(root+image_folder, filename))
    if (img is not None) & (len(img.shape) > 2):
        images.append(img)

In [6]:
# # Image preprocessing
# image_files = dorsals.loc[:, 'imageName']
# removed_ims = []

# for im in images:
#     output = data.remove(im) # uses CoreML, takes too long to run
#     removed_ims.append(output)

# for i, im in enumerate(removed_ims):
#     # Convert BGRA image to RBGA
#     grayscale_im = cv2.cvtColor(im, cv2.COLOR_BGRA2RGBA)

In [7]:
glcm_features = []

for im, filename in zip(images, image_files):
    contrast, correlation, energy, homogeneity = features.compute_glcm(im)

    # Append the GLCM features for each image along with the file name
    glcm_features.append({
        'Filename': filename,
        'Contrast': contrast[0, 0],
        'Correlation': correlation[0, 0],
        'Energy': energy[0, 0],
        'Homogeneity': homogeneity[0, 0]
    })

In [8]:
# GLCM dataframe

glcm_df = pd.DataFrame(glcm_features)
glcm_df.set_index('Filename', inplace=True)
# Merging dataframes with different column names
dorsal_texture = dorsals.merge(glcm_df, left_on='imageName', right_on='Filename', how='inner')

# Save the merged DataFrame back to the CSV file
# dorsal_texture.to_csv(root+'/data/Dorsal_Texture_test.csv', index=False)

In [13]:
# --- Perform AutoML Evaluation

X = dorsal_texture[['age', 'Contrast', 'Correlation', 'Energy','Homogeneity']]

# Set up the dataset for AutoML regression
regression.setup(data=X,
                 target="age",
                 log_experiment=True,
                 experiment_name=experiment_name,
                 session_id=random_seed,
                ) 

best_models = regression.compare_models(n_select=num_best_models, verbose=False)


Unnamed: 0,Description,Value
0,Session id,42
1,Target,age
2,Target type,Regression
3,Original data shape,"(2892, 5)"
4,Transformed data shape,"(2892, 5)"
5,Transformed train set shape,"(2024, 5)"
6,Transformed test set shape,"(868, 5)"
7,Numeric features,4
8,Preprocess,True
9,Imputation type,simple


In [14]:
best_models

[ExtraTreesRegressor(n_jobs=-1, random_state=42),
 LGBMRegressor(n_jobs=-1, random_state=42),
 RandomForestRegressor(n_jobs=-1, random_state=42),
 GradientBoostingRegressor(random_state=42),
 BayesianRidge()]

In [12]:
regression.pull()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
et,Extra Trees Regressor,1.6796,26.5511,4.9812,0.5075,0.1257,0.0592,0.442
lightgbm,Light Gradient Boosting Machine,2.2876,31.3271,5.5111,0.391,0.1522,0.0852,2.822
rf,Random Forest Regressor,1.8933,32.8256,5.5687,0.3716,0.1423,0.0667,0.813
gbr,Gradient Boosting Regressor,2.1245,35.8862,5.9222,0.3027,0.1547,0.0755,0.542
br,Bayesian Ridge,2.9591,48.3567,6.8657,0.1037,0.1781,0.1061,0.032
lr,Linear Regression,2.9797,48.3532,6.8662,0.1033,0.1787,0.1071,2.159
ridge,Ridge Regression,2.7432,50.3402,6.9961,0.0724,0.1771,0.0952,0.018
lasso,Lasso Regression,2.7762,51.6847,7.0873,0.0488,0.1803,0.0962,0.017
llar,Lasso Least Angle Regression,2.7762,51.6847,7.0873,0.0488,0.1803,0.0962,0.032
en,Elastic Net,2.7813,51.6843,7.0875,0.0487,0.1804,0.0964,0.02
