## 2023 06/16 REMBG and LBP AutoML Experimentation

*Last Updated*: 2023-09-26

### Authors
* Hannah Zhang (hannahzhang@ucsb.edu)


### Overview
This Jupyter notebook is intended to demonstrate

* REMBG (removed bg) processed images with lbp feature extraction results from automl tests


### Key Results

- AdaBoost Regressor, Extra Trees Regressor, and Gradient Boosting Machine perform the best
- Dummy Regressor did not perform well

In [None]:
# --- Imports

# External packages
from pycaret import regression
from pycaret.datasets import get_data
import os
import cv2
from dermaml import data, features
import PIL
from PIL import Image
import numpy as np
import mlflow
import matplotlib.pyplot as plt
import pandas as pd
import skimage
import pandas as pd

In [104]:
# Dataset
dataset_name = "11khands"

# AutoML
experiment_name = "11khands-automl-lbp-features"
num_best_models = 5
random_seed = 123  # seed used for random number generators to ensure reproducibility of results in this notebook

In [105]:
image_folder = '/Users/hannahzhang/Downloads/11khands_test_ims_2/'
image_files = os.listdir(image_folder)

images = []
for filename in image_files:
    img = cv2.imread(os.path.join(image_folder, filename))
    if img is not None:
        images.append(img)
        print(filename)

Hand_0000003.jpg
Hand_0000002.jpg
Hand_0003093.jpg
Hand_0009409.jpg
Hand_0001045.jpg
Hand_0003097.jpg
Hand_0000005.jpg
Hand_0000004.jpg
Hand_0003096.jpg
Hand_0001046.jpg
Hand_0003094.jpg
Hand_0000006.jpg
Hand_0003095.jpg
Hand_0001047.jpg
Hand_0001803.jpg
Hand_0000880.jpg
Hand_0001807.jpg
Hand_0001806.jpg
Hand_0000878.jpg
Hand_0001757.jpg
Hand_0001804.jpg
Hand_0001805.jpg
Hand_0000879.jpg
Hand_0000135.jpg
Hand_0001015.jpg
Hand_0009313.jpg
Hand_0000082.jpg
Hand_0000083.jpg
Hand_0009312.jpg
Hand_0001014.jpg
Hand_0000134.jpg
Hand_0000877.jpg
Hand_0001016.jpg
Hand_0009310.jpg
Hand_0001758.jpg
Hand_0000081.jpg
Hand_0000080.jpg
Hand_0001759.jpg
Hand_0009311.jpg
Hand_0000876.jpg
Hand_0000133.jpg
Hand_0001013.jpg
Hand_0001761.jpg
Hand_0000084.jpg
Hand_0001760.jpg
Hand_0001012.jpg
Hand_0009314.jpg
Hand_0000132.jpg
Hand_0001049.jpg
Hand_0001048.jpg
Hand_0000140.jpg
Hand_0009410.jpg
Hand_0009411.jpg
Hand_0009413.jpg
Hand_0009412.jpg


In [106]:
removed_ims = []

for im in images:
    output = data.remove(im)
    gray_image = cv2.cvtColor(output, cv2.COLOR_RGBA2GRAY)
    removed_ims.append(gray_image)

In [81]:
lbp_features = []

for im, filename in zip(removed_ims, image_files):
    lbp = features.extract_features(im)
    lbp_features.append({
        'values': lbp,
        'filename': filename
        })

In [82]:
print(lbp_features)

[{'values': {'texture': [0.01663125, 0.017653645833333332, 0.018042708333333334, 0.0242671875, 0.050145833333333334, 0.051359375, 0.025117708333333332, 0.019573958333333332, 0.016031770833333334, 0.7065171875, 0.054659375]}, 'filename': 'Hand_0000003.jpg'}, {'values': {'texture': [0.016026041666666668, 0.016641666666666666, 0.015564583333333333, 0.023136458333333332, 0.0521953125, 0.0536234375, 0.0249765625, 0.018111458333333334, 0.0154671875, 0.7104671875, 0.053790104166666665]}, 'filename': 'Hand_0000002.jpg'}, {'values': {'texture': [0.010753645833333334, 0.012133333333333333, 0.011615104166666666, 0.014965104166666666, 0.028797395833333333, 0.028936979166666668, 0.013913020833333333, 0.0111828125, 0.011207291666666667, 0.8178072916666667, 0.038688020833333336]}, 'filename': 'Hand_0003093.jpg'}, {'values': {'texture': [0.015650520833333334, 0.017634895833333334, 0.012905208333333333, 0.014136458333333332, 0.026202604166666667, 0.025794270833333334, 0.0123734375, 0.011325520833333333

Load in CSV

In [88]:
csv_file = '/Users/hannahzhang/Downloads/11khands_lbp.csv'

# Read the CSV file
original_df = pd.read_csv(csv_file)

In [112]:
# Initialize empty lists to store column data
pattern_columns = []
filename_column = []

# Iterate through the list of dictionaries
for entry in lbp_features:
    texture_values = entry['values']['texture']  # Extract texture values
    filename = entry['filename']  # Extract filename

    filename_column.append(filename)
    pattern_columns.append(texture_values)

# Create a DataFrame from the extracted data
lbp_df = pd.DataFrame(pattern_columns)

# Rename columns to "Pattern 1", "Pattern 2", ...
lbp_df.columns = [f'Pattern {i}' for i in range(1, len(lbp_df.columns) + 1)]

# Add the "filename" column
lbp_df['filename'] = filename_column

# Reorder columns to have 'filename' as the first column
lbp_df = lbp_df[['filename'] + [col for col in lbp_df.columns if col != 'filename']]

# Print the resulting DataFrame
print(lbp_df)


            filename  Pattern 1  Pattern 2  Pattern 3  Pattern 4  Pattern 5  \
0   Hand_0000003.jpg   0.016631   0.017654   0.018043   0.024267   0.050146   
1   Hand_0000002.jpg   0.016026   0.016642   0.015565   0.023136   0.052195   
2   Hand_0003093.jpg   0.010754   0.012133   0.011615   0.014965   0.028797   
3   Hand_0009409.jpg   0.015651   0.017635   0.012905   0.014136   0.026203   
4   Hand_0001045.jpg   0.016746   0.016859   0.012874   0.016634   0.038664   
5   Hand_0003097.jpg   0.011199   0.012784   0.012226   0.016108   0.031992   
6   Hand_0000005.jpg   0.014219   0.015403   0.016710   0.022774   0.048099   
7   Hand_0000004.jpg   0.014446   0.015618   0.016826   0.022946   0.048051   
8   Hand_0003096.jpg   0.011734   0.013099   0.012460   0.016200   0.032989   
9   Hand_0001046.jpg   0.019440   0.021628   0.016495   0.018831   0.035630   
10  Hand_0003094.jpg   0.011172   0.012675   0.011998   0.015637   0.030811   
11  Hand_0000006.jpg   0.014614   0.016050   0.01671

In [113]:
# Set the image filenames as the index in the new GLCM DataFrame
lbp_df.set_index('filename', inplace=True)

In [115]:
# Merging DataFrames with different column names
merged_data = original_df.merge(lbp_df, left_on='imageName', right_on='filename', how='inner')

# Save the merged DataFrame back to the CSV file
merged_data.to_csv(csv_file, index=False)

In [116]:
print(merged_data)

    age         imageName  Pattern 1  Pattern 2  Pattern 3  Pattern 4  \
0    27  Hand_0000002.jpg   0.016026   0.016642   0.015565   0.023136   
1    27  Hand_0000003.jpg   0.016631   0.017654   0.018043   0.024267   
2    27  Hand_0000004.jpg   0.014446   0.015618   0.016826   0.022946   
3    27  Hand_0000005.jpg   0.014219   0.015403   0.016710   0.022774   
4    27  Hand_0000006.jpg   0.014614   0.016050   0.016719   0.022909   
5    20  Hand_0000080.jpg   0.021762   0.021106   0.013084   0.013300   
6    20  Hand_0000081.jpg   0.019444   0.019723   0.011416   0.012176   
7    20  Hand_0000082.jpg   0.022606   0.021019   0.011695   0.011970   
8    20  Hand_0000083.jpg   0.023087   0.021656   0.011527   0.010463   
9    20  Hand_0000084.jpg   0.024003   0.022706   0.011515   0.010337   
10   21  Hand_0000132.jpg   0.012551   0.013757   0.010464   0.012228   
11   21  Hand_0000133.jpg   0.013990   0.013776   0.011308   0.014524   
12   21  Hand_0000134.jpg   0.011115   0.011368   0

In [117]:
merged_data.drop(columns=['imageName'], inplace=True)

In [118]:
print(merged_data)

    age  Pattern 1  Pattern 2  Pattern 3  Pattern 4  Pattern 5  Pattern 6  \
0    27   0.016026   0.016642   0.015565   0.023136   0.052195   0.053623   
1    27   0.016631   0.017654   0.018043   0.024267   0.050146   0.051359   
2    27   0.014446   0.015618   0.016826   0.022946   0.048051   0.048166   
3    27   0.014219   0.015403   0.016710   0.022774   0.048099   0.047478   
4    27   0.014614   0.016050   0.016719   0.022909   0.051197   0.052437   
5    20   0.021762   0.021106   0.013084   0.013300   0.021371   0.022471   
6    20   0.019444   0.019723   0.011416   0.012176   0.023435   0.024037   
7    20   0.022606   0.021019   0.011695   0.011970   0.022080   0.023729   
8    20   0.023087   0.021656   0.011527   0.010463   0.019799   0.020452   
9    20   0.024003   0.022706   0.011515   0.010337   0.019518   0.020229   
10   21   0.012551   0.013757   0.010464   0.012228   0.024340   0.022273   
11   21   0.013990   0.013776   0.011308   0.014524   0.035067   0.035461   

AutoML Testing

In [119]:
# --- Perform AutoML Evaluation

# Set up the dataset for AutoML regression
regression.setup(data=merged_data,
                 target="age",
                 log_experiment=True,
                 experiment_name=experiment_name,
                 session_id=random_seed,
                ) 

# Automatically train, test, and evaluate models
best_models = regression.compare_models(n_select=num_best_models, verbose=False)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,age
2,Target type,Regression
3,Original data shape,"(55, 12)"
4,Transformed data shape,"(55, 12)"
5,Transformed train set shape,"(38, 12)"
6,Transformed test set shape,"(17, 12)"
7,Numeric features,11
8,Preprocess,True
9,Imputation type,simple


Traceback (most recent call last):
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 304, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 397, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1306, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1299, in _read_helper
    result = read_yaml(root, file_name)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/utils/file_utils.py", line 282, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.e

In [120]:
for model in best_models:
    print(model)
    print()

AdaBoostRegressor(random_state=123)

ExtraTreesRegressor(n_jobs=-1, random_state=123)

GradientBoostingRegressor(random_state=123)

RandomForestRegressor(n_jobs=-1, random_state=123)

DecisionTreeRegressor(random_state=123)



In [121]:
# Display score table
regression.pull()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
ada,AdaBoost Regressor,3.1606,40.145,4.9795,0.764,0.1256,0.0925,0.021
et,Extra Trees Regressor,5.7612,95.0728,7.3759,0.4965,0.1636,0.1468,0.028
gbr,Gradient Boosting Regressor,6.9443,149.9929,9.8558,0.2619,0.2251,0.1799,0.021
rf,Random Forest Regressor,8.0807,145.6056,10.3257,0.2009,0.2435,0.2103,0.028
dt,Decision Tree Regressor,6.8167,185.6167,10.6475,-0.0366,0.2777,0.222,0.017
knn,K Neighbors Regressor,9.64,193.5973,12.5601,-0.3053,0.2654,0.2334,0.019
par,Passive Aggressive Regressor,12.9408,292.4415,15.5275,-0.5057,0.3635,0.32,0.026
huber,Huber Regressor,12.511,327.9345,15.9549,-0.5596,0.3823,0.3087,0.019
omp,Orthogonal Matching Pursuit,12.8625,289.5857,15.955,-0.8319,0.383,0.3503,0.017
br,Bayesian Ridge,13.6511,302.8428,16.3449,-0.9474,0.3935,0.3722,0.018


In [122]:
# --- AdaBoost Regressor

ada_model = regression.create_model('ada')
ada_model_tuned = regression.tune_model(ada_model)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,3.8611,23.8549,4.8842,0.266,0.1591,0.1452
1,3.875,33.8125,5.8149,0.8227,0.0826,0.0517
2,0.25,0.25,0.5,0.9939,0.0162,0.0083
3,7.0417,119.5903,10.9357,0.7252,0.2061,0.1547
4,1.0,2.0,1.4142,0.9857,0.0489,0.037
5,5.7115,121.179,11.0081,0.6523,0.3465,0.2692
6,5.8333,71.0556,8.4294,0.7851,0.2099,0.1535
7,0.25,0.25,0.5,0.998,0.0162,0.0083
8,3.0833,28.5208,5.3405,0.414,0.1368,0.0717
9,0.7,0.9367,0.9678,0.9976,0.0335,0.0255


Traceback (most recent call last):
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 304, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 397, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1306, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1299, in _read_helper
    result = read_yaml(root, file_name)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/utils/file_utils.py", line 282, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.e

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,2.2225,9.8135,3.1326,0.698,0.1288,0.0992
1,14.6716,498.804,22.3339,-1.6158,0.4216,0.2014
2,0.25,0.25,0.5,0.9939,0.0162,0.0083
3,6.9844,119.3135,10.9231,0.7258,0.2051,0.1526
4,0.7778,1.2099,1.0999,0.9914,0.0384,0.0288
5,20.976,736.9975,27.1477,-1.1148,0.6553,0.6051
6,4.25,39.25,6.265,0.8813,0.1681,0.1259
7,0.267,0.183,0.4278,0.9985,0.0154,0.01
8,4.9048,46.0034,6.7826,0.0547,0.1782,0.1193
9,0.7381,0.8248,0.9082,0.9979,0.0312,0.0265


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


Traceback (most recent call last):
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 304, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 397, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1306, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1299, in _read_helper
    result = read_yaml(root, file_name)
  File "/Applications/DermaML/.venv/lib/python3.10/site-packages/mlflow/utils/file_utils.py", line 282, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.e