# Import Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds
import tensorflow as tf
from cv2 import cv2
from skimage import feature
from sklearn import neighbors

# Retrieve Dataset

In [2]:
from preprocess import retrieve_dataset, preprocess, to_np

(train_ds_raw, test_ds_raw), metadata = retrieve_dataset(should_shuffle_files=True)

train_ds = preprocess(train_ds_raw, {
  'is_undersample_negative': True,
  'reduce_dataset_to': 300,
  'is_grayscale': False, 
  'is_downsample64': False,
  'is_downsample128': False, 
  'is_normalize': False,
})

test_ds = preprocess(test_ds_raw, {
  'is_undersample_negative': True,
  'reduce_dataset_to': 75,
  'is_grayscale': False, 
  'is_downsample64': False, 
  'is_downsample128': False, 
  'is_normalize': False,
})

def count_class(counts, batch):
    labels = batch[1]
    for i in range(9):
        cc = tf.cast(labels == i, tf.int32)
        counts[i] += tf.reduce_sum(cc)
    return counts

initial_state = dict((i, 0) for i in range(9))
counts = train_ds.reduce(initial_state=initial_state,
                         reduce_func=count_class)

print("Class breakdown for train dataset:")
print([(k, v.numpy()) for k, v in counts.items()])

initial_state = dict((i, 0) for i in range(9))
counts = test_ds.reduce(initial_state=initial_state,
                         reduce_func=count_class)

print("Class breakdown for test dataset:")
print([(k, v.numpy()) for k, v in counts.items()])

Class breakdown for train dataset:
[(0, 38), (1, 33), (2, 27), (3, 33), (4, 35), (5, 43), (6, 26), (7, 35), (8, 30)]
Class breakdown for test dataset:
[(0, 9), (1, 8), (2, 10), (3, 9), (4, 8), (5, 6), (6, 8), (7, 10), (8, 7)]


# Extract X_train, Y_train, X_test, Y_test

In [3]:
train_ds_numpy = to_np(train_ds)
test_ds_numpy = to_np(test_ds)

X_train = [example for example, label in train_ds_numpy]
Y_train = [label for example, label in train_ds_numpy]

X_test = [example for example, label in test_ds_numpy]
Y_test = [label for example, label in test_ds_numpy]

# Perform Transfer Learning

In [4]:
from transfer_learning import init_conv_base, extract_features
conv_base = init_conv_base(X_train[0])

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 256, 256, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 256, 256, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 256, 256, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 128, 128, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 128, 128, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 128, 128, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 64, 64, 128)       0     

In [5]:
train_features, train_labels = extract_features(conv_base, X_train, Y_train)
test_features, test_labels = extract_features(conv_base, X_test, Y_test)

# Flatten To Fit Decision Tree

In [6]:
X_train_flatten = list(map(lambda x: x.flatten(), train_features))
Y_train = train_labels
X_test_flatten = list(map(lambda x: x.flatten(), test_features))
Y_test = test_labels

print(f'Number of training instances: {len(X_train_flatten)}')
print(f'Number of features: {len(X_train_flatten[0])}')

Number of training instances: 300
Number of features: 32768


# Train Decision Tree Model With K-Fold Cross Validation

In [13]:
import importlib
import run_algo_with_kfold
importlib.reload(run_algo_with_kfold)
from run_algo_with_kfold import kfold_cross_validation

depths = [5, 6, 7, 8, 9, 10]
k = 5
clfs_and_accuracies = []

for depth in depths:
  clf_and_accuracy = kfold_cross_validation(k, X_train_flatten, Y_train, 'decision_tree', {'depth': depth})
  clfs_and_accuracies.append(clf_and_accuracy)

Running 5-fold cross validation for decision_tree with {'depth': 5}
Split accuracy: 0.16666666666666666
Split accuracy: 0.21666666666666667
Split accuracy: 0.21666666666666667
Split accuracy: 0.15
Split accuracy: 0.35
Completed 5-fold cross validation for decision_tree with {'depth': 5}
Obtained average accuracy of: 0.22000000000000003

Running 5-fold cross validation for decision_tree with {'depth': 6}
Split accuracy: 0.18333333333333332
Split accuracy: 0.2
Split accuracy: 0.23333333333333334
Split accuracy: 0.21666666666666667
Split accuracy: 0.36666666666666664
Completed 5-fold cross validation for decision_tree with {'depth': 6}
Obtained average accuracy of: 0.24

Running 5-fold cross validation for decision_tree with {'depth': 7}
Split accuracy: 0.21666666666666667
Split accuracy: 0.21666666666666667
Split accuracy: 0.3
Split accuracy: 0.2
Split accuracy: 0.31666666666666665
Completed 5-fold cross validation for decision_tree with {'depth': 7}
Obtained average accuracy of: 0.25

R

In [14]:
print(clfs_and_accuracies)

[(DecisionTreeClassifier(max_depth=5), 0.35), (DecisionTreeClassifier(max_depth=6), 0.36666666666666664), (DecisionTreeClassifier(max_depth=7), 0.31666666666666665), (DecisionTreeClassifier(max_depth=8), 0.31666666666666665), (DecisionTreeClassifier(max_depth=9), 0.26666666666666666), (DecisionTreeClassifier(max_depth=10), 0.2833333333333333)]


In [32]:
import importlib
import run_algo_with_kfold
importlib.reload(run_algo_with_kfold)
from run_algo_with_kfold import get_precision_scores

In [33]:
for clf_and_accuracy in clfs_and_accuracies:
  (clf, accuracy) = clf_and_accuracy
  print(get_precision_scores(clf, X_test_flatten, Y_test))

{'accuracy': 0.21333333333333335, 'macro_avg': 0.1785014005602241, 'f1_score_macro': 0.17951045342686212, 'micro_avg': 0.21333333333333335, 'f1_score_micro': 0.21333333333333337, 'roc_auc_score': 0.5489881224448876}
{'accuracy': 0.21333333333333335, 'macro_avg': 0.1799583132916466, 'f1_score_macro': 0.18412347886032093, 'micro_avg': 0.21333333333333335, 'f1_score_micro': 0.21333333333333337, 'roc_auc_score': 0.5486563005926786}
{'accuracy': 0.18666666666666668, 'macro_avg': 0.2193241943241943, 'f1_score_macro': 0.192584448956998, 'micro_avg': 0.18666666666666668, 'f1_score_micro': 0.18666666666666668, 'roc_auc_score': 0.5577652654453659}
{'accuracy': 0.14666666666666667, 'macro_avg': 0.138985088985089, 'f1_score_macro': 0.14059825747805288, 'micro_avg': 0.14666666666666667, 'f1_score_micro': 0.14666666666666667, 'roc_auc_score': 0.5179670214295322}
{'accuracy': 0.2, 'macro_avg': 0.19706589706589706, 'f1_score_macro': 0.18174498609281217, 'micro_avg': 0.2, 'f1_score_micro': 0.2000000000

In [1]:
import importlib
import run_algo_with_kfold
importlib.reload(run_algo_with_kfold)
from run_algo_with_kfold import get_roc_auc_curve

fprs = []
tprs = []
roc_aucs = []
for clf_and_accuracy in clfs_and_accuracies:
  (fpr, tpr, roc_auc) = get_roc_auc_curve(clf, X_train_flatten, y_train, X_test_flatten, y_test, {'is_svm': False})
  fprs.append(fpr)
  tprs.append(tpr)
  roc_aucs.append(roc_auc)

NameError: name 'clfs_and_accuracies' is not defined

In [None]:
import importlib
import run_algo_with_kfold
importlib.reload(run_algo_with_kfold)
from run_algo_with_kfold import visualize_roc_auc_curve
for depth, fpr, tpr, roc_auc in zip(depths, fprs, tprs, roc_aucs):
  title = f'ROC curve for depth = {str(depth)}'
  visualize_roc_auc_curve(title, fpr, tpr, roc_auc, len(np.unique(y_test)))