## 2023 06/16 Multi Cropped Palm and LBP AutoML Experimentation

*Last Updated*: 2023-09-26

### Authors
* Hannah Zhang (hannahzhang@ucsb.edu)


### Overview
This Jupyter notebook is intended to demonstrate

* Mediapipe multi palm cropped images with lbp feature extraction results from automl tests


### Key Results

- Linear Regressor, Bayesian Ridge, and Light Gradient Boosting Machine performed the best
- Dummy Regressor did not perform well as compared to experiments with lbp features from single cropped palm -> more lbp features per hand leads to better results with automl

In [None]:
# --- Imports

# External packages
from pycaret import regression
from pycaret.datasets import get_data
import os
import cv2
from dermaml import data, features
import PIL
from PIL import Image
import numpy as np
import mlflow
import matplotlib.pyplot as plt
import pandas as pd
import skimage
import pandas as pd
import mediapipe as mp


In [2]:
dataset_name = "11khands"

experiment_name = "multi-cropped 11khands with lbp"

num_best_models = 5
random_seed = 345

In [3]:
# Dataset
dataset_name = "11khands"

# AutoML
experiment_name = "11khands-automl-sample-test"
num_best_models = 5
random_seed = 123  # seed used for random number generators to ensure reproducibility of results in this notebook

In [12]:
def load_image_paths(folder_path):
    image_paths = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff')):
                # Create the full path to the image file
                image_path = os.path.join(root, file)
                image_paths.append(image_path)
    return image_paths

# Example usage:
image_folder = '/Users/hannahzhang/Downloads/11khands_test_ims_2/'
image_paths = load_image_paths(image_folder)

# Print the list of image paths
print(image_paths)

['/Users/hannahzhang/Downloads/11khands_test_ims_2/Hand_0000003.jpg', '/Users/hannahzhang/Downloads/11khands_test_ims_2/Hand_0000002.jpg', '/Users/hannahzhang/Downloads/11khands_test_ims_2/Hand_0003093.jpg', '/Users/hannahzhang/Downloads/11khands_test_ims_2/Hand_0009409.jpg', '/Users/hannahzhang/Downloads/11khands_test_ims_2/Hand_0001045.jpg', '/Users/hannahzhang/Downloads/11khands_test_ims_2/Hand_0003097.jpg', '/Users/hannahzhang/Downloads/11khands_test_ims_2/Hand_0000005.jpg', '/Users/hannahzhang/Downloads/11khands_test_ims_2/Hand_0000004.jpg', '/Users/hannahzhang/Downloads/11khands_test_ims_2/Hand_0003096.jpg', '/Users/hannahzhang/Downloads/11khands_test_ims_2/Hand_0001046.jpg', '/Users/hannahzhang/Downloads/11khands_test_ims_2/Hand_0003094.jpg', '/Users/hannahzhang/Downloads/11khands_test_ims_2/Hand_0000006.jpg', '/Users/hannahzhang/Downloads/11khands_test_ims_2/Hand_0003095.jpg', '/Users/hannahzhang/Downloads/11khands_test_ims_2/Hand_0001047.jpg', '/Users/hannahzhang/Downloads/11k

In [14]:
results = []
for path in image_paths: 
    result = data.multi_crop_palm(path)
    if result is not None:
        results.append(result)

No hands detected in the image.
No hands detected in the image.
No hands detected in the image.
No hands detected in the image.
No hands detected in the image.
No hands detected in the image.
No hands detected in the image.
No hands detected in the image.


In [17]:
# Iterate through the list of dictionaries
for result in results:
    # Iterate through the dictionary to access image filenames and sub-dictionaries
    for image_filename, image_dict in result.items():
        # Iterate through the sub-dictionary containing the images
        for key, image in image_dict.items():
            # Check if the key is 'Image 9' or 'Image 10' to process images
            if key == 'Image 9' or key == 'Image 10':
                # Convert the image to grayscale
                grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

                # Update the sub-dictionary with the grayscale image
                image_dict[key] = grayscale_image

In [32]:
# Initialize an empty list to store LBP features
lbp_features = []

# Iterate through the list of dictionaries containing images
for image_dict in results:
    # Extract the image filenames
    image_filenames = list(image_dict.keys())
    
    # Iterate through the sub-dictionaries containing the images
    for image_filename, image_data in image_dict.items():
        # Extract the images from the sub-dictionary
        image_9 = image_data['Image 9']
        image_10 = image_data['Image 10']

        # Apply LBP feature extraction to images
        lbp_9 = features.extract_features(image_9)
        lbp_10 = features.extract_features(image_10)

        # Append the LBP features and filenames to the lbp_features list
        lbp_features.append({
            'landmark_9': lbp_9,
            'landmark_10': lbp_10,
            'filename': image_filename
        })


In [33]:
len(lbp_features)

47

In [34]:
print(lbp_features)

[{'landmark_9': {'texture': [0.07375346260387812, 0.07531163434903047, 0.07583102493074792, 0.09193213296398892, 0.10560941828254848, 0.11755540166204986, 0.06371191135734072, 0.0625, 0.05678670360110803, 0.06475069252077563, 0.21225761772853186]}, 'landmark_10': {'texture': [0.06111495844875346, 0.07323407202216066, 0.08275623268698061, 0.08916204986149584, 0.13867728531855955, 0.13988919667590027, 0.07617728531855955, 0.058691135734072024, 0.058344875346260386, 0.04587950138504155, 0.17607340720221606]}, 'filename': 'Hand_0000003.jpg'}, {'landmark_9': {'texture': [0.07409972299168975, 0.06440443213296398, 0.06492382271468145, 0.07583102493074792, 0.1331371191135734, 0.14958448753462603, 0.06128808864265928, 0.05851800554016621, 0.06128808864265928, 0.06007617728531856, 0.19684903047091412]}, 'landmark_10': {'texture': [0.057306094182825486, 0.06388504155124654, 0.07686980609418283, 0.09695290858725762, 0.12846260387811634, 0.14542936288088643, 0.0865650969529086, 0.06319252077562326,

In [44]:
# Create an empty DataFrame
lbp_df = pd.DataFrame()

# Iterate through the list of dictionaries and populate the DataFrame
for i, item in enumerate(lbp_features):
    filename = item['filename']
    landmark_9_values = item['landmark_9']['texture']
    landmark_10_values = item['landmark_10']['texture']

    # Create columns for landmark 9 values
    for j, value in enumerate(landmark_9_values):
        column_name = f'pattern {j + 1}'
        lbp_df.at[i, column_name] = value

    # Create columns for landmark 10 values
    for j, value in enumerate(landmark_10_values):
        column_name = f'pattern {j + 1 + len(landmark_9_values)}'
        lbp_df.at[i, column_name] = value

# Add the filename column
lbp_df['filename'] = lbp_features[0]['filename']

# Reorder the columns
column_order = ['filename'] + [f'pattern {i + 1}' for i in range(len(lbp_df.columns) - 1)]
lbp_df = lbp_df[column_order]

print(lbp_df)


            filename  pattern 1  pattern 2  pattern 3  pattern 4  pattern 5  \
0   Hand_0000003.jpg   0.073753   0.075312   0.075831   0.091932   0.105609   
1   Hand_0000003.jpg   0.074100   0.064404   0.064924   0.075831   0.133137   
2   Hand_0000003.jpg   0.062500   0.077043   0.077735   0.094356   0.133137   
3   Hand_0000003.jpg   0.088989   0.088123   0.068386   0.066136   0.080332   
4   Hand_0000003.jpg   0.091759   0.100762   0.059211   0.056267   0.067175   
5   Hand_0000003.jpg   0.066482   0.072022   0.068213   0.097472   0.128289   
6   Hand_0000003.jpg   0.086565   0.078947   0.067867   0.074792   0.115132   
7   Hand_0000003.jpg   0.076004   0.076697   0.072715   0.083795   0.123961   
8   Hand_0000003.jpg   0.056960   0.072022   0.079813   0.092625   0.144564   
9   Hand_0000003.jpg   0.076177   0.107341   0.071676   0.066136   0.091066   
10  Hand_0000003.jpg   0.060942   0.068040   0.070118   0.100069   0.150450   
11  Hand_0000003.jpg   0.078774   0.083622   0.07946

Load in CSV File

In [58]:
csv_file = '/Users/hannahzhang/Downloads/multi_cropped_lbp_automl.csv'

original_df = pd.read_csv(csv_file)

In [59]:
merged_df = pd.merge(original_df, lbp_df, left_on='imageName', right_on='filename', how='inner')

# Drop filename column

merged_df.drop('filename', axis=1, inplace=True)

# Display the resulting DataFrame
print(merged_df)


    age         imageName  pattern 1  pattern 2  pattern 3  pattern 4  \
0    27  Hand_0000003.jpg   0.073753   0.075312   0.075831   0.091932   
1    27  Hand_0000003.jpg   0.074100   0.064404   0.064924   0.075831   
2    27  Hand_0000003.jpg   0.062500   0.077043   0.077735   0.094356   
3    27  Hand_0000003.jpg   0.088989   0.088123   0.068386   0.066136   
4    27  Hand_0000003.jpg   0.091759   0.100762   0.059211   0.056267   
5    27  Hand_0000003.jpg   0.066482   0.072022   0.068213   0.097472   
6    27  Hand_0000003.jpg   0.086565   0.078947   0.067867   0.074792   
7    27  Hand_0000003.jpg   0.076004   0.076697   0.072715   0.083795   
8    27  Hand_0000003.jpg   0.056960   0.072022   0.079813   0.092625   
9    27  Hand_0000003.jpg   0.076177   0.107341   0.071676   0.066136   
10   27  Hand_0000003.jpg   0.060942   0.068040   0.070118   0.100069   
11   27  Hand_0000003.jpg   0.078774   0.083622   0.079467   0.076524   
12   27  Hand_0000003.jpg   0.068733   0.067348   0

In [60]:
merged_df.drop('imageName', axis=1, inplace=True)

In [61]:
print(f"Columns: {list(merged_df.columns)}")

Columns: ['age', 'pattern 1', 'pattern 2', 'pattern 3', 'pattern 4', 'pattern 5', 'pattern 6', 'pattern 7', 'pattern 8', 'pattern 9', 'pattern 10', 'pattern 11', 'pattern 12', 'pattern 13', 'pattern 14', 'pattern 15', 'pattern 16', 'pattern 17', 'pattern 18', 'pattern 19', 'pattern 20', 'pattern 21', 'pattern 22']


AutoML Testing

In [62]:
# --- Perform AutoML Evaluation

# Set up the dataset for AutoML regression
regression.setup(data=merged_df,
                 target="age",
                 log_experiment=True,
                 experiment_name=experiment_name,
                 session_id=random_seed,
                ) 

# Automatically train, test, and evaluate models
best_models = regression.compare_models(n_select=num_best_models, verbose=False)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,age
2,Target type,Regression
3,Original data shape,"(47, 23)"
4,Transformed data shape,"(47, 23)"
5,Transformed train set shape,"(32, 23)"
6,Transformed test set shape,"(15, 23)"
7,Numeric features,22
8,Preprocess,True
9,Imputation type,simple


Traceback (most recent call last):
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 304, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 397, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1306, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1299, in _read_helper
    result = read_yaml(root, file_name)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/utils/file_utils.py", line 282, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.e

In [53]:
for model in best_models:
    print(model)
    print()

LinearRegression(n_jobs=-1)

BayesianRidge()

LGBMRegressor(n_jobs=-1, random_state=123)

Ridge(random_state=123)

ElasticNet(random_state=123)



In [54]:
# Display score table
regression.pull()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lr,Linear Regression,0.0,0.0,0.0,1.0,0.0002,0.0,0.184
br,Bayesian Ridge,0.0,0.0,0.0,1.0,0.0002,0.0,0.004
lightgbm,Light Gradient Boosting Machine,0.0,0.0,0.0,1.0,0.0002,0.0,0.008
ridge,Ridge Regression,0.0,0.0,0.0,1.0,0.0002,0.0,0.006
en,Elastic Net,0.0,0.0,0.0,1.0,0.0002,0.0,0.005
lar,Least Angle Regression,0.0,0.0,0.0,1.0,0.0002,0.0,0.005
llar,Lasso Least Angle Regression,0.0,0.0,0.0,1.0,0.0002,0.0,0.004
lasso,Lasso Regression,0.0,0.0,0.0,1.0,0.0002,0.0,0.006
omp,Orthogonal Matching Pursuit,0.0,0.0,0.0,1.0,0.0002,0.0,0.006
knn,K Neighbors Regressor,0.0,0.0,0.0,1.0,0.0002,0.0,0.009


In [56]:
# --- Linear Regression

lr_model = regression.create_model('lr')
lr_model_tuned = regression.tune_model(lr_model)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.0,0.0,0.0,1.0,0.0002,0.0
1,0.0,0.0,0.0,1.0,0.0002,0.0
2,0.0,0.0,0.0,1.0,0.0002,0.0
3,0.0,0.0,0.0,1.0,0.0002,0.0
4,0.0,0.0,0.0,1.0,0.0002,0.0
5,0.0,0.0,0.0,1.0,0.0002,0.0
6,0.0,0.0,0.0,1.0,0.0002,0.0
7,0.0,0.0,0.0,1.0,0.0002,0.0
8,0.0,0.0,0.0,1.0,0.0002,0.0
9,0.0,0.0,0.0,1.0,0.0002,0.0


Traceback (most recent call last):
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 304, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 397, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1306, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1299, in _read_helper
    result = read_yaml(root, file_name)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/utils/file_utils.py", line 282, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.e

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.0,0.0,0.0,1.0,0.0002,0.0
1,0.0,0.0,0.0,1.0,0.0002,0.0
2,0.0,0.0,0.0,1.0,0.0002,0.0
3,0.0,0.0,0.0,1.0,0.0002,0.0
4,0.0,0.0,0.0,1.0,0.0002,0.0
5,0.0,0.0,0.0,1.0,0.0002,0.0
6,0.0,0.0,0.0,1.0,0.0002,0.0
7,0.0,0.0,0.0,1.0,0.0002,0.0
8,0.0,0.0,0.0,1.0,0.0002,0.0
9,0.0,0.0,0.0,1.0,0.0002,0.0


Fitting 10 folds for each of 2 candidates, totalling 20 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Traceback (most recent call last):
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 304, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 397, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1306, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1299, in _read_helper
    result = read_yaml(root, file_name)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/utils/file_utils.py", line 282, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.e

In [57]:
# --- Bayesian Ridge

br_model = regression.create_model('br')
br_model_tuned = regression.tune_model(br_model)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.0,0.0,0.0,1.0,0.0002,0.0
1,0.0,0.0,0.0,1.0,0.0002,0.0
2,0.0,0.0,0.0,1.0,0.0002,0.0
3,0.0,0.0,0.0,1.0,0.0002,0.0
4,0.0,0.0,0.0,1.0,0.0002,0.0
5,0.0,0.0,0.0,1.0,0.0002,0.0
6,0.0,0.0,0.0,1.0,0.0002,0.0
7,0.0,0.0,0.0,1.0,0.0002,0.0
8,0.0,0.0,0.0,1.0,0.0002,0.0
9,0.0,0.0,0.0,1.0,0.0002,0.0


Traceback (most recent call last):
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 304, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 397, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1306, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1299, in _read_helper
    result = read_yaml(root, file_name)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/utils/file_utils.py", line 282, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.e

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.0,0.0,0.0,1.0,0.0002,0.0
1,0.0,0.0,0.0,1.0,0.0002,0.0
2,0.0,0.0,0.0,1.0,0.0002,0.0
3,0.0,0.0,0.0,1.0,0.0002,0.0
4,0.0,0.0,0.0,1.0,0.0002,0.0
5,0.0,0.0,0.0,1.0,0.0002,0.0
6,0.0,0.0,0.0,1.0,0.0002,0.0
7,0.0,0.0,0.0,1.0,0.0002,0.0
8,0.0,0.0,0.0,1.0,0.0002,0.0
9,0.0,0.0,0.0,1.0,0.0002,0.0


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Traceback (most recent call last):
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 304, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 397, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1306, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1299, in _read_helper
    result = read_yaml(root, file_name)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/utils/file_utils.py", line 282, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.e