In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing  import LabelEncoder
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
import pandas as pd
import numpy as np
import xgboost as xgb


# Load Data & Encoding Label

In [2]:
df_features = pd.read_csv('allNDM_M.csv')

In [3]:
label_encoder = LabelEncoder() 
# Encode labels in column 'species'. 
df_features['Label']= label_encoder.fit_transform(df_features['Label']) 

## ONLY COLOR

In [4]:
df_rgb = df_features.filter(items=['RGB_R_Mean', 'RGB_R_StdDev', 'RGB_R_Skewness', 'RGB_G_Mean', 'RGB_G_StdDev',
 'RGB_G_Skewness', 'RGB_B_Mean', 'RGB_B_StdDev', 'RGB_B_Skewness', 'Label'])

df_hsv = df_features.filter(items=['HSV_R_Mean', 'HSV_R_StdDev', 'HSV_R_Skewness', 'HSV_G_Mean', 'HSV_G_StdDev',
 'HSV_G_Skewness', 'HSV_B_Mean', 'HSV_B_StdDev', 'HSV_B_Skewness','Label'])
                                   
df_lab = df_features.filter(items=['Lab_R_Mean', 'Lab_R_StdDev', 'Lab_R_Skewness', 'Lab_G_Mean', 'Lab_G_StdDev',
 'Lab_G_Skewness', 'Lab_B_Mean', 'Lab_B_StdDev', 'Lab_B_Skewness','Label'])

## ONLY SHAPE

In [5]:
df_bitwhite = df_features.filter(items=['Area', 'Perimeter', 'Solidity', 'Circularity', 'Diameter','Label'])
df_humoment = df_features.filter(items=['Hu_0', 'Hu_1', 'Hu_2', 'Hu_3', 'Hu_4', 'Hu_5', 'Hu_6','Label'])

## ONLY TEXTURE

In [6]:
df_fractal = df_features.filter(items=['FractalDimension','Label'])
df_glcm = df_features.filter(items=[ 'Energy', 'Entropy', 'Contrast', 'Correlation', 'Label'])

## LAB + SHAPE

In [7]:
df_lab_bitwhite = df_features.filter(items=['Lab_R_Mean', 'Lab_R_StdDev', 'Lab_R_Skewness', 'Lab_G_Mean', 'Lab_G_StdDev',
 'Lab_G_Skewness', 'Lab_B_Mean', 'Lab_B_StdDev', 'Lab_B_Skewness','Area', 
  'Perimeter', 'Solidity', 'Circularity', 'Diameter','Label'])

df_lab_humoment = df_features.filter(items=['Lab_R_Mean', 'Lab_R_StdDev', 'Lab_R_Skewness', 'Lab_G_Mean', 'Lab_G_StdDev',
 'Lab_G_Skewness', 'Lab_B_Mean', 'Lab_B_StdDev', 'Lab_B_Skewness','Hu_0', 'Hu_1', 'Hu_2', 'Hu_3', 'Hu_4', 'Hu_5',
 'Hu_6','Label'])

## LAB + TEXTURE

In [8]:
df_lab_fractal = df_features.filter(items=['Lab_R_Mean', 'Lab_R_StdDev', 'Lab_R_Skewness', 'Lab_G_Mean', 'Lab_G_StdDev',
 'Lab_G_Skewness', 'Lab_B_Mean', 'Lab_B_StdDev', 'Lab_B_Skewness','FractalDimension','Label'])

df_lab_glcm  = df_features.filter(items=['Lab_R_Mean', 'Lab_R_StdDev', 'Lab_R_Skewness', 'Lab_G_Mean', 'Lab_G_StdDev',
 'Lab_G_Skewness', 'Lab_B_Mean', 'Lab_B_StdDev', 'Lab_B_Skewness','Energy', 'Entropy', 'Contrast', 'Correlation', 'Label'])

## LAB + SHPAE + TEXTURE

In [9]:
df_lab_bitwhite_fractal = df_features.filter(items=['Lab_R_Mean', 'Lab_R_StdDev', 'Lab_R_Skewness', 'Lab_G_Mean', 'Lab_G_StdDev',
 'Lab_G_Skewness', 'Lab_B_Mean', 'Lab_B_StdDev', 'Lab_B_Skewness','Area', 
  'Perimeter', 'Solidity', 'Circularity', 'Diameter','FractalDimension','Label'])

df_lab_bitwhite_glcm = df_features.filter(items=['Lab_R_Mean', 'Lab_R_StdDev', 'Lab_R_Skewness', 'Lab_G_Mean', 'Lab_G_StdDev',
 'Lab_G_Skewness', 'Lab_B_Mean', 'Lab_B_StdDev', 'Lab_B_Skewness','Area', 
  'Perimeter', 'Solidity', 'Circularity', 'Diameter','Energy', 'Entropy', 'Contrast', 'Correlation', 'Label'])

In [10]:
df_lab_humoment_fractal = df_features.filter(items=['Lab_R_Mean', 'Lab_R_StdDev', 'Lab_R_Skewness', 'Lab_G_Mean', 'Lab_G_StdDev',
 'Lab_G_Skewness', 'Lab_B_Mean', 'Lab_B_StdDev', 'Lab_B_Skewness','Hu_0', 'Hu_1', 'Hu_2', 'Hu_3', 'Hu_4', 'Hu_5',
 'Hu_6','FractalDimension','Label'])

df_lab_humoment_glcm = df_features.filter(items=['Lab_R_Mean', 'Lab_R_StdDev', 'Lab_R_Skewness', 'Lab_G_Mean', 'Lab_G_StdDev',
 'Lab_G_Skewness', 'Lab_B_Mean', 'Lab_B_StdDev', 'Lab_B_Skewness','Hu_0', 'Hu_1', 'Hu_2', 'Hu_3', 'Hu_4', 'Hu_5',
 'Hu_6','Energy', 'Entropy', 'Contrast', 'Correlation', 'Label'])

## LAB+SHAPE

In [11]:
df_list = [df_rgb, df_hsv, df_lab,df_bitwhite,df_humoment,df_fractal,df_glcm]
# Dictionary of models for easier iteration
dataset = {
     "RGB": df_rgb,
     "HSV": df_hsv,
     "LAB": df_lab,
     "BitWhite": df_bitwhite,
     "Hu Moment": df_humoment,
     "Fractal Dimension": df_fractal,
     "GLCM": df_glcm,
    "LAB + Bitwhite":df_lab_bitwhite,
    "LAB + Hu Moment":df_lab_humoment,
    "LAB + Fractal Dimension":df_lab_fractal,
    "LAB + GLCM":df_lab_glcm ,
    "LAB + Bitwhite + Fractal Dimension":df_lab_bitwhite_fractal,
    "LAB + Bitwhite + GLCM":df_lab_bitwhite_glcm,
    "LAB + Hu Moment + Fractal Dimension":df_lab_humoment_fractal,
    "LAB + Hu Moment + GLCM":df_lab_humoment_glcm,
    
}

# Classification

In [12]:
for name, features_df in dataset.items():
    # Assuming 'features_df' is a DataFrame containing all the extracted features and labels
    X = features_df.drop('Label', axis=1)  # Feature matrix
    y = features_df['Label']  # Labels
    # Splitting the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Standardizing the features (important for models like SVM and KNN)
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.fit_transform(X_test)
    
    # Dictionary of models for easier iteration
    models = {
        "Decision Tree": DecisionTreeClassifier(random_state=42),
        "Random Forest": RandomForestClassifier(random_state=42),
        "Gradient Boosting": GradientBoostingClassifier(random_state=42),
        "Extra Trees": ExtraTreesClassifier(random_state=42),
       # "K-Nearest Neighbors": KNeighborsClassifier(),
       # "Support Vector Machine": SVC(random_state=42),
        "XGBoost": xgb.XGBClassifier(random_state=42)
        #kernel='linear', C=1
        #learning_rate=0.1, max_depth=3,  use_label_encoder=False,eval_metric='logloss'
    }
    print("Feature extraction:: ",name)
    # Training and evaluating each model
    for name, model in models.items():
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
        accuracy = accuracy_score(y_test, y_pred)
        print(f"{name} Accuracy:\t  {accuracy:.4f}")
    print('\n')


Feature extraction::  RGB
Decision Tree Accuracy:	  0.6700
Random Forest Accuracy:	  0.8600
Gradient Boosting Accuracy:	  0.7900
Extra Trees Accuracy:	  0.8500
XGBoost Accuracy:	  0.8300


Feature extraction::  HSV
Decision Tree Accuracy:	  0.6000
Random Forest Accuracy:	  0.7700
Gradient Boosting Accuracy:	  0.6900
Extra Trees Accuracy:	  0.7800
XGBoost Accuracy:	  0.6800


Feature extraction::  LAB
Decision Tree Accuracy:	  0.6000
Random Forest Accuracy:	  0.8100
Gradient Boosting Accuracy:	  0.7600
Extra Trees Accuracy:	  0.8600
XGBoost Accuracy:	  0.7900


Feature extraction::  BitWhite
Decision Tree Accuracy:	  0.3100
Random Forest Accuracy:	  0.3500
Gradient Boosting Accuracy:	  0.4000
Extra Trees Accuracy:	  0.3600
XGBoost Accuracy:	  0.3300


Feature extraction::  Hu Moment
Decision Tree Accuracy:	  0.5500
Random Forest Accuracy:	  0.4700
Gradient Boosting Accuracy:	  0.4500
Extra Trees Accuracy:	  0.4100
XGBoost Accuracy:	  0.4300


Feature extraction::  Fractal Dimension
Deci

# 5-Folds Cross Validation

In [19]:
for name, features_df in dataset.items():
    # Assuming 'features_df' is a DataFrame containing all the extracted features and labels
    X = features_df.drop('Label', axis=1)  # Feature matrix
    y = features_df['Label']  # Labels

    # Standardizing the features (important for models like SVM and KNN)
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)  # Scale the entire dataset
    
    # Dictionary of models for easier iteration
    models = {
        "Decision Tree": DecisionTreeClassifier(random_state=42),
        "Random Forest": RandomForestClassifier(random_state=42),
        "Gradient Boosting": GradientBoostingClassifier(random_state=42),
        "Extra Trees": ExtraTreesClassifier(random_state=42),
       # "K-Nearest Neighbors": KNeighborsClassifier(),
       # "Support Vector Machine": SVC(random_state=42),
        "XGBoost": xgb.XGBClassifier(random_state=42)
        #kernel='linear', C=1
        #learning_rate=0.1, max_depth=3,  use_label_encoder=False,eval_metric='logloss'
    }
    
    print("Feature extraction:: ",name)
    # Apply 5-fold cross-validation and print accuracy for each model
    for name, model in models.items():
        cv_scores = cross_val_score(model, X_scaled, y, cv=5)
        print(f"{name} Accuracy:\t  {np.mean(cv_scores):.4f} ± {np.std(cv_scores):.4f}")
    print('\n')
   


Feature extraction::  RGB
Decision Tree Accuracy:	  0.6620 ± 0.0788
Random Forest Accuracy:	  0.7560 ± 0.0954
Gradient Boosting Accuracy:	  0.7580 ± 0.0739
Extra Trees Accuracy:	  0.7620 ± 0.0873
XGBoost Accuracy:	  0.7440 ± 0.0811


Feature extraction::  HSV
Decision Tree Accuracy:	  0.6000 ± 0.0780
Random Forest Accuracy:	  0.6960 ± 0.0786
Gradient Boosting Accuracy:	  0.6900 ± 0.0888
Extra Trees Accuracy:	  0.7120 ± 0.0788
XGBoost Accuracy:	  0.6720 ± 0.0913


Feature extraction::  LAB
Decision Tree Accuracy:	  0.7500 ± 0.0986
Random Forest Accuracy:	  0.7900 ± 0.1077
Gradient Boosting Accuracy:	  0.7860 ± 0.0913
Extra Trees Accuracy:	  0.7820 ± 0.0974
XGBoost Accuracy:	  0.7640 ± 0.0985


Feature extraction::  BitWhite
Decision Tree Accuracy:	  0.4080 ± 0.0312
Random Forest Accuracy:	  0.4080 ± 0.0319
Gradient Boosting Accuracy:	  0.4020 ± 0.0519
Extra Trees Accuracy:	  0.3980 ± 0.0331
XGBoost Accuracy:	  0.4040 ± 0.0265


Feature extraction::  Hu Moment
Decision Tree Accuracy:	  0

# GridSearch - Extra Tree

In [16]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

In [18]:
df_lab_fractal
# Assuming 'features_df' is a DataFrame containing all the extracted features and labels
X = df_lab_fractal.drop('Label', axis=1)  # Feature matrix
y = df_lab_fractal['Label']  # Labels
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the features (important for models like SVM and KNN)
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)
    
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None,2, 10, 20, 30],
    'min_samples_split': [2, 5, 10,20],  # Minimum number of samples required to split an internal node
   #'min_samples_leaf': [1, 2,3, 4]  # Minimum number of samples required to be at a leaf node
}

# Initialize the model
extra_trees = ExtraTreesClassifier(random_state=42)

# Set up Grid Search
grid_search = GridSearchCV(estimator=extra_trees, param_grid=param_grid, cv=5 , verbose=2, n_jobs=-1)

grid_search.fit(X_train_scaled, y_train)  # Assuming you have training data X_train and labels y_train
best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_model = grid_search.best_estimator_

y_pred = best_model.predict(X_test_scaled)  # Assuming you have test data X_test
accuracy = accuracy_score(y_test, y_pred)  # Assuming you have the true labels y_test
print(f"Accuracy of the optimized model: {accuracy:.4f}")
#print(f"{name} Accuracy:\t  {accuracy:.4f}")


Fitting 5 folds for each of 60 candidates, totalling 300 fits
Best parameters: {'max_depth': None, 'min_samples_split': 10, 'n_estimators': 200}
Accuracy of the optimized model: 0.8700
