In [1]:
import numpy as np
import matplotlib.pyplot as plt

from utils import *

from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier

In [2]:
data_dir = "../Data_Cropped_and_Resized"

### Feature Extraction

In [3]:
train_path = f"{data_dir}/train"

train_imgs, train_sobel_edges, train_labels = extract_features(train_path, detect_edges_sobel)
_, train_hounsfield_edges, _ = extract_features(train_path, apply_hounsfield_units)
_, train_threshold_edges, _ = extract_features(train_path, threshold_image)
_, train_canny_edges, _ = extract_features(train_path, detect_canny_edges)
_, train_soft_tissue, _ = extract_features(train_path, get_soft_tissue)

In [4]:
valid_path = f"{data_dir}/valid"

valid_imgs, valid_sobel_edges, valid_labels = extract_features(valid_path, detect_edges_sobel)
_, valid_hounsfield_edges, _ = extract_features(valid_path, apply_hounsfield_units)
_, valid_threshold_edges, _ = extract_features(valid_path, threshold_image)
_, valid_canny_edges, _ = extract_features(valid_path, detect_canny_edges)
_, valid_soft_tissue, _ = extract_features(valid_path, get_soft_tissue)

In [5]:
# dimensionality reduction

### individual edge features
input_features = np.array([[img.flatten() for img in train_sobel_edges],
                            [img.flatten() for img in train_hounsfield_edges],
                            [img.flatten() for img in train_threshold_edges],
                            [img.flatten() for img in train_canny_edges]])

X_train_sobel_pca, X_train_hounsfield_pca, X_train_thresh_pca, X_train_canny_pca = get_PCA(input_features, n_components=2)[-1]
X_train_sobel_tsne, X_train_hounsfield_tsne, X_train_thresh_tsne, X_train_canny_tsne = get_tsne(input_features, n_components=2)

input_features = np.array([[img.flatten() for img in valid_sobel_edges],
                            [img.flatten() for img in valid_hounsfield_edges],
                            [img.flatten() for img in valid_threshold_edges],
                            [img.flatten() for img in valid_canny_edges]])
X_val_sobel_pca, X_val_hounsfield_pca, X_val_thresh_pca, X_val_canny_pca = get_PCA(input_features, n_components=2)[-1]
X_val_sobel_tsne, X_val_hounsfield_tsne, X_val_thresh_tsne, X_val_canny_tsne = get_tsne(input_features, n_components=2)

### combined features
combined_train_features = np.array([[np.array([w, x, y, z]).flatten() for w, x, y, z in zip(train_sobel_edges, train_hounsfield_edges, train_threshold_edges, train_canny_edges)]])
X_train_combined_pca = get_PCA(combined_train_features, n_components=2)[-1][0]
X_train_combined_tsne = get_tsne(combined_train_features, n_components=2)[0]

combined_val_features = np.array([[np.array([w, x, y, z]).flatten() for w, x, y, z in zip(valid_sobel_edges, valid_hounsfield_edges, valid_threshold_edges, valid_canny_edges)]])
X_val_combined_pca = get_PCA(combined_val_features, n_components=2)[-1][0]
X_val_combined_tsne = get_tsne(combined_val_features, n_components=2)[0]

## Grid Parameter search

### DecisionTree

In [7]:
# hyperparameter grid to search through
param_grid = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

dt_classifier = DecisionTreeClassifier()

X_train = np.array([img.flatten() for img in train_soft_tissue])
y_train = train_labels


In [8]:
# hyperparameter search on the training data
grid_search = GridSearchCV(dt_classifier, param_grid, scoring='accuracy', n_jobs=-1)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

In [9]:
X_val = np.array([img.flatten() for img in valid_soft_tissue])
y_val = valid_labels

In [13]:
print("Best Hyperparameters:", best_params)

y_pred = best_model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Best Model Accuracy:", accuracy)

Best Hyperparameters: {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 2, 'splitter': 'random'}
Best Model Accuracy: 0.5694444444444444


### SVM

In [6]:
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'sigmoid'],
    'gamma': [0.1, 1, 10, 100]
}

svm_model = SVC()

X_train = np.array([img.flatten() for img in train_soft_tissue])
y_train = train_labels

In [7]:
grid_search = GridSearchCV(svm_model, param_grid, cv=5)

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

In [9]:
X_val = np.array([img.flatten() for img in valid_soft_tissue])
y_val = valid_labels

In [10]:
print("Best Hyperparameters:", best_params)

y_pred = best_model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Best Model Accuracy:", accuracy)

Best Hyperparameters: {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear'}
Best Model Accuracy: 0.8055555555555556
