## 2023 06/16 Multi Cropped Palm and LBP AutoML Experimentation

*Last Updated*: 2023-09-26

### Authors
* Hannah Zhang (hannahzhang@ucsb.edu)


### Overview
This Jupyter notebook is intended to demonstrate

* Mediapipe multi palm cropped images with lbp feature extraction results from automl tests


### Key Results

- Linear Regressor, Bayesian Ridge, and Light Gradient Boosting Machine performed the best
- Dummy Regressor did not perform well as compared to experiments with lbp features from single cropped palm -> more lbp features per hand leads to better results with automl

In [1]:
# --- Imports

# External packages
from pycaret import regression
from pycaret.datasets import get_data
import os
import cv2
from dermaml import data, features
import PIL
from PIL import Image
import numpy as np
import mlflow
import matplotlib.pyplot as plt
import pandas as pd
import skimage
import pandas as pd
import mediapipe as mp


In [2]:
dataset_name = "11khands"

experiment_name = "multi-cropped 11khands with lbp"

num_best_models = 5
random_seed = 345

In [3]:
# Dataset
dataset_name = "11khands"

# AutoML
experiment_name = "11khands-automl-sample-test"
num_best_models = 5
random_seed = 123  # seed used for random number generators to ensure reproducibility of results in this notebook

In [4]:
def load_image_paths(folder_path):
    image_paths = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff')):
                # Create the full path to the image file
                image_path = os.path.join(root, file)
                image_paths.append(image_path)
    return image_paths

# Example usage:
image_folder = '/Users/hannahzhang/Downloads/11khands_test_ims_2/'
image_paths = load_image_paths(image_folder)

# Print the list of image paths
print(image_paths)

[]


In [5]:
results = []
for path in image_paths: 
    result = data.multi_crop_palm(path)
    if result is not None:
        results.append(result)

In [6]:
# Iterate through the list of dictionaries
for result in results:
    # Iterate through the dictionary to access image filenames and sub-dictionaries
    for image_filename, image_dict in result.items():
        # Iterate through the sub-dictionary containing the images
        for key, image in image_dict.items():
            # Check if the key is 'Image 9' or 'Image 10' to process images
            if key == 'Image 9' or key == 'Image 10':
                # Convert the image to grayscale
                grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

                # Update the sub-dictionary with the grayscale image
                image_dict[key] = grayscale_image

In [7]:
# Initialize an empty list to store LBP features
lbp_features = []

# Iterate through the list of dictionaries containing images
for image_dict in results:
    # Extract the image filenames
    image_filenames = list(image_dict.keys())
    
    # Iterate through the sub-dictionaries containing the images
    for image_filename, image_data in image_dict.items():
        # Extract the images from the sub-dictionary
        image_9 = image_data['Image 9']
        image_10 = image_data['Image 10']

        # Apply LBP feature extraction to images
        lbp_9 = features.extract_features(image_9)
        lbp_10 = features.extract_features(image_10)

        # Append the LBP features and filenames to the lbp_features list
        lbp_features.append({
            'landmark_9': lbp_9,
            'landmark_10': lbp_10,
            'filename': image_filename
        })


In [8]:
len(lbp_features)

0

In [9]:
print(lbp_features)

[]


In [10]:
# Create an empty DataFrame
lbp_df = pd.DataFrame()

# Iterate through the list of dictionaries and populate the DataFrame
for i, item in enumerate(lbp_features):
    filename = item['filename']
    landmark_9_values = item['landmark_9']['texture']
    landmark_10_values = item['landmark_10']['texture']

    # Create columns for landmark 9 values
    for j, value in enumerate(landmark_9_values):
        column_name = f'pattern {j + 1}'
        lbp_df.at[i, column_name] = value

    # Create columns for landmark 10 values
    for j, value in enumerate(landmark_10_values):
        column_name = f'pattern {j + 1 + len(landmark_9_values)}'
        lbp_df.at[i, column_name] = value

# Add the filename column
lbp_df['filename'] = lbp_features[0]['filename']

# Reorder the columns
column_order = ['filename'] + [f'pattern {i + 1}' for i in range(len(lbp_df.columns) - 1)]
lbp_df = lbp_df[column_order]

print(lbp_df)


IndexError: list index out of range

Load in CSV File

In [None]:
csv_file = '/Users/hannahzhang/Downloads/multi_cropped_lbp_automl.csv'

original_df = pd.read_csv(csv_file)

In [None]:
merged_df = pd.merge(original_df, lbp_df, left_on='imageName', right_on='filename', how='inner')

# Drop filename column

merged_df.drop('filename', axis=1, inplace=True)

# Display the resulting DataFrame
print(merged_df)


In [None]:
merged_df.drop('imageName', axis=1, inplace=True)

In [None]:
print(f"Columns: {list(merged_df.columns)}")

AutoML Testing

In [None]:
# --- Perform AutoML Evaluation

# Set up the dataset for AutoML regression
regression.setup(data=merged_df,
                 target="age",
                 log_experiment=True,
                 experiment_name=experiment_name,
                 session_id=random_seed,
                ) 

# Automatically train, test, and evaluate models
best_models = regression.compare_models(n_select=num_best_models, verbose=False)

In [None]:
for model in best_models:
    print(model)
    print()

In [None]:
# Display score table
regression.pull()

In [None]:
# --- Linear Regression

lr_model = regression.create_model('lr')
lr_model_tuned = regression.tune_model(lr_model)

In [None]:
# --- Bayesian Ridge

br_model = regression.create_model('br')
br_model_tuned = regression.tune_model(br_model)