In [2]:
import numpy as np
import matplotlib.pyplot as plt

from utils import *

from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier


In [3]:
data_dir = "../Data_Cropped_and_Resized"

### Feature Extraction

In [4]:
train_path = f"{data_dir}/train"

train_imgs, train_sobel_edges, train_labels = extract_features(train_path, detect_edges_sobel)
_, train_threshold_edges, _ = extract_features(train_path, threshold_image)
_, train_canny_edges, _ = extract_features(train_path, detect_canny_edges)
_, train_soft_tissue, _ = extract_features(train_path, get_soft_tissue)

In [5]:
valid_path = f"{data_dir}/valid"

valid_imgs, valid_sobel_edges, valid_labels = extract_features(valid_path, detect_edges_sobel)
_, valid_threshold_edges, _ = extract_features(valid_path, threshold_image)
_, valid_canny_edges, _ = extract_features(valid_path, detect_canny_edges)
_, valid_soft_tissue, _ = extract_features(valid_path, get_soft_tissue)

## Grid Parameter search

### DecisionTree

In [7]:
# hyperparameter grid to search through
param_grid = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

dt_classifier = DecisionTreeClassifier()

X_train = np.array([img.flatten() for img in train_soft_tissue])
y_train = train_labels


In [8]:
# hyperparameter search on the training data
grid_search = GridSearchCV(dt_classifier, param_grid, scoring='accuracy', n_jobs=-1)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

In [9]:
X_val = np.array([img.flatten() for img in valid_soft_tissue])
y_val = valid_labels

In [13]:
print("Best Hyperparameters:", best_params)

y_pred = best_model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Best Model Accuracy:", accuracy)

Best Hyperparameters: {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 2, 'splitter': 'random'}
Best Model Accuracy: 0.5694444444444444


### SVM

In [6]:
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'sigmoid'],
    'gamma': [0.1, 1, 10, 100]
}

svm_model = SVC()

X_train = np.array([img.flatten() for img in train_soft_tissue])
y_train = train_labels

In [7]:
grid_search = GridSearchCV(svm_model, param_grid, cv=5)

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

In [9]:
X_val = np.array([img.flatten() for img in valid_soft_tissue])
y_val = valid_labels

In [10]:
print("Best Hyperparameters:", best_params)

y_pred = best_model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Best Model Accuracy:", accuracy)

Best Hyperparameters: {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear'}
Best Model Accuracy: 0.8055555555555556


## Random Forest

In [6]:
# Define a grid of hyperparameters to search
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf_classifier = RandomForestClassifier(random_state=42)

X_train = np.array([img.flatten() for img in train_soft_tissue])
y_train = train_labels

In [8]:
# Create GridSearchCV instance
grid_search = GridSearchCV(rf_classifier, param_grid, cv=5, scoring='accuracy')

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

In [9]:
# Print the best parameters and best score
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 10, 'n_estimators': 150}
Best Score: 0.8075303212048514


In [10]:
X_val = np.array([img.flatten() for img in valid_soft_tissue])
y_val = valid_labels

In [11]:
y_pred = best_model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Best Model Accuracy:", accuracy)

Best Model Accuracy: 0.6805555555555556
