## Import Libraries and Packages

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import cv2
from sklearn import datasets
from sklearn import metrics
from sklearn import model_selection
from sklearn import preprocessing
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, roc_auc_score
import tensorflow as tf
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data.dataset import random_split
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch.utils.data import Subset
import keras
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Concatenate, Dense, Dropout
from keras.models import Model
from keras.callbacks import EarlyStopping
from keras.regularizers import L2
from keras.metrics import Precision, Recall, AUC
from keras.utils import to_categorical
from keras.optimizers import Adam
import pickle
import warnings
warnings.filterwarnings('ignore')

## Import Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os

# ADC data
data_path = '/content/drive/MyDrive/Computer Vision Project/Data/20230508'
file_names = os.listdir(data_path)
pkl_files = [file for file in file_names if file.endswith('.pkl')]
pkl_files = [pkl_files[0], pkl_files[2], pkl_files[4]]

file_path_y = data_path + '/' + pkl_files[0]
file_path_x = data_path + '/' + pkl_files[1]
file_path_idx = data_path + '/' + pkl_files[2]

with open(file_path_x, 'rb') as file:
  x = pickle.load(file)

with open(file_path_y, 'rb') as file:
  y = pickle.load(file)

with open(file_path_idx, 'rb') as file:
  idx = pickle.load(file)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
pkl_files

['20230508_adc_annotated_label.pkl',
 '20230508_adc_input_rgb_128_all.pkl',
 '20230508_patients_index_updated.pkl']

In [None]:
# Get a list of the keys
keys = list(idx.keys())

# Get the key at the desired index
train_key = keys[np.int(0.7*len(keys))]
print('Last index in training = ', idx[train_key][1])

val_key = keys[np.int(0.85*len(keys))]
print('Last index in validation = ', idx[val_key][1])

Last index in training =  19160
Last index in validation =  23234


## Split Training / Validation / Test

In [None]:
print(x.shape)

(27342, 128, 128, 3)


In [None]:
# Get a list of the keys
keys = list(idx.keys())

# Get the keys at the desired index
train_key = keys[np.int(0.7*len(keys))] # outputs a list [start_idx, end_idx]
val_key = keys[np.int(0.85*len(keys))]
print(train_key, val_key)

# Split into training and validation and test
x_train = x[0:idx[train_key][1]]
y_train = y[0:idx[train_key][1]]

x_val = x[idx[train_key][1]:idx[val_key][1]]
y_val = y[idx[train_key][1]:idx[val_key][1]]

x_test = x[idx[val_key][1]:]
y_test = y[idx[val_key][1]:]

# Print shapes
print('x_train shape = ', x_train.shape)
print('y_train shape = ', y_train.shape)

print('x_val shape = ', x_val.shape)
print('y_val shape = ', y_val.shape)

print('x_test shape = ', x_test.shape)
print('y_test shape = ', y_test.shape)

11035_1001055 11256_1001279
x_train shape =  (19160, 128, 128, 3)
y_train shape =  (19160,)
x_val shape =  (4074, 128, 128, 3)
y_val shape =  (4074,)
x_test shape =  (4108, 128, 128, 3)
y_test shape =  (4108,)


In [None]:
print(val_key)
print(idx[val_key])

11256_1001279
[23215, 23234]


In [None]:
last_patient = keys[-1]
print(idx[last_patient])

[27314, 27342]


## Redefine Class Weights

In [None]:
import numpy as np
from sklearn.utils import class_weight
from collections import Counter

# Assuming you have your labels as a NumPy array or list
labels = y_train

# Find the sample sizes of each class
total_samples = x_train.shape[0]

def sample_per_class_counter(data):
  samples_per_class = []
  total_samples = data.shape[0]

  for class_label in np.unique(data):
    counter = 0
    for i in data:
      if i == class_label:
        counter += 1
    
    samples_per_class.append(counter)
  
  return samples_per_class

# Calculate class weights
sample_weights = class_weight.compute_sample_weight(class_weight='balanced', y=labels)
class_weights = np.unique(sample_weights)

# Create a dictionary mapping class index to class weight
class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}

print(class_weights_dict)

samples_per_class = sample_per_class_counter(y_train)
weights_per_class = torch.tensor([total_samples / class_sample_size for class_sample_size in samples_per_class], dtype=torch.float32)
weights_per_class

rescaling_factor = total_samples / torch.sum(weights_per_class)
weights_per_class *= rescaling_factor
weights_per_class

{0: 0.5177258971033291, 1: 14.603658536585366}


tensor([  655.9999, 18504.0000])

## Sanity Check for Distribution of Data

In [None]:
# Find the sample sizes of each class (test)
samples_per_class_test = sample_per_class_counter(y_test)

samples_per_class_test = [100*x/y_test.shape[0] for x in samples_per_class_test]
samples_per_class_test

# Find the sample sizes of each class (train)
samples_per_class_train = sample_per_class_counter(y_train)

samples_per_class_train = [100*x/y_train.shape[0] for x in samples_per_class_train]

# Find the sample sizes of each class (validation)
samples_per_class_val = sample_per_class_counter(y_val)

samples_per_class_val = [100*x/y_val.shape[0] for x in samples_per_class_val]

print('Train distribution = ', samples_per_class_train)
print('Validation distribution = ', samples_per_class_val)
print('Test distribution = ', samples_per_class_test)

Train distribution =  [96.57620041753654, 3.4237995824634657]
Validation distribution =  [94.96809032891507, 5.031909671084929]
Test distribution =  [96.22687439143135, 3.7731256085686464]


## VGG19 : Raw data / No re-weighting / Pre-trained (No fine-tuning)

In [None]:
# Create the VGG19 base model
vgg_base_model = keras.applications.VGG19(
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax"
)

# Create the input tensor
inputs = keras.Input(shape=x_train.shape[1:])

# Apply normalization using the `Normalization` layer
normalized = keras.layers.Normalization()(inputs)

# Pass the normalized input through the VGG19 base model
vgg_embedding_no_augment = vgg_base_model(normalized)

# Create the model with inputs and outputs
embed_model = keras.Model(inputs=inputs, outputs=vgg_embedding_no_augment)

# Predict the embeddings
vgg_embedding_no_augment = embed_model.predict(x_train)

input = keras.Input(shape=vgg_embedding_no_augment.shape[1:])
#x = keras.layers.Rescaling(1./255)(input)
x = keras.layers.GlobalAveragePooling2D()(input)
#x = keras.layers.Flatten()(input)
x = keras.layers.Dense(256, activation='relu')(x)
#x = keras.layers.Dense(128, activation='relu')(x)
x = keras.layers.Dropout(0.2)(x)
x = keras.layers.Dense(128, activation='relu')(x)
x = keras.layers.Dropout(0.3)(x)
output = keras.layers.Dense(1, activation="sigmoid", name="output")(x)

model = keras.Model(input, output, name='vgg19_raw_default-weights_pretrained')
model.compile(optimizer=Adam(lr=0.00001), loss='binary_crossentropy',  metrics=[tf.keras.metrics.AUC()])
model.summary()

#model.compile(optimizer='adam', loss='binary_crossentropy',  metrics=[tf.keras.metrics.AUC()])
#model.compile(optimizer='adam', loss='binary_crossentropy',  metrics='accuracy')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "vgg19_raw_default-weights_pretrained"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 4, 4, 512)]       0         
                                                                 
 global_average_pooling2d (G  (None, 512)              0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 256)               131328    
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                       

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', # Metric to monitor
    patience=20,  # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True
    )  # Restore the weights of the best epoch

history = model.fit(vgg_embedding_no_augment, y_train, 
                    batch_size=32, 
                    epochs=100,  # Increase the number of epochs to allow for early stopping
                    validation_split=0.2,
                    callbacks=early_stopping) 

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

## Output Metrics for Training / Validation / Test

### Raw | Training

In [None]:
y_pred_probs = model.predict(vgg_embedding_no_augment)

def evaluate_thresholds(y_true, y_pred_probs, thresholds):
  results = []
  for threshold in thresholds:

      # Convert probabilities to binary predictions based on threshold
      y_pred = np.where(y_pred_probs >= threshold, 1, 0)
      
      # Calculate evaluation metrics
      positive_count = sum(y_pred)
      accuracy = accuracy_score(y_true, y_pred)
      precision = precision_score(y_true, y_pred)
      recall = recall_score(y_true, y_pred)
      f1 = f1_score(y_true, y_pred)
      auc = roc_auc_score(y_true, y_pred)
      
      results.append((threshold, positive_count, f1, precision, recall, auc, accuracy))
  
  results_df = pd.DataFrame(results, columns=["Threshold","positive_count" ,"F1", "Precision", "Recall", "AUC", "Accuracy"])
  return results_df

results_df = evaluate_thresholds(y_train, y_pred_probs, np.arange(0, 1, 0.05))
results_df



Unnamed: 0,Threshold,positive_count,F1,Precision,Recall,AUC,Accuracy
0,0.0,[19160],0.066209,0.034238,1.0,0.5,0.034238
1,0.05,[5012],0.200423,0.113328,0.865854,0.812845,0.763466
2,0.1,[3165],0.264852,0.159874,0.771341,0.813821,0.853392
3,0.15,[2074],0.32967,0.216972,0.685976,0.799105,0.904489
4,0.2,[1302],0.371808,0.27957,0.554878,0.752093,0.935804
5,0.25,[792],0.404696,0.369949,0.446646,0.70984,0.95501
6,0.3,[464],0.407143,0.491379,0.347561,0.667403,0.965344
7,0.35,[269],0.378378,0.650558,0.266768,0.630844,0.96999
8,0.4,[171],0.331318,0.80117,0.208841,0.603502,0.971138
9,0.45,[127],0.286079,0.88189,0.170732,0.584961,0.970825


### Raw | Validation

In [None]:
vgg_embedding_no_augment = embed_model.predict(x_val)
y_pred_probs = model.predict(vgg_embedding_no_augment)

results_df = evaluate_thresholds(y_val, y_pred_probs, np.arange(0, 1, 0.05))
results_df



Unnamed: 0,Threshold,positive_count,F1,Precision,Recall,AUC,Accuracy
0,0.0,[4074],0.095817,0.050319,1.0,0.5,0.050319
1,0.05,[1004],0.180314,0.108566,0.531707,0.650191,0.75675
2,0.1,[618],0.18712,0.124595,0.37561,0.61789,0.835788
3,0.15,[400],0.201653,0.1525,0.297561,0.604971,0.881443
4,0.2,[228],0.133949,0.127193,0.141463,0.545014,0.907953
5,0.25,[115],0.075,0.104348,0.058537,0.515957,0.927344
6,0.3,[59],0.060606,0.135593,0.039024,0.512921,0.939126
7,0.35,[32],0.025316,0.09375,0.014634,0.503569,0.943299
8,0.4,[17],0.027027,0.176471,0.014634,0.505508,0.946981
9,0.45,[10],0.027907,0.3,0.014634,0.506412,0.948699


### Raw | Test

In [None]:
vgg_embedding_no_augment = embed_model.predict(x_test)
y_pred_probs = model.predict(vgg_embedding_no_augment)

results_df = evaluate_thresholds(y_test, y_pred_probs, np.arange(0, 1, 0.05))
results_df



Unnamed: 0,Threshold,positive_count,F1,Precision,Recall,AUC,Accuracy
0,0.0,[4108],0.072719,0.037731,1.0,0.5,0.037731
1,0.05,[824],0.143003,0.084951,0.451613,0.630436,0.795764
2,0.1,[505],0.169697,0.110891,0.36129,0.623853,0.866602
3,0.15,[308],0.168467,0.126623,0.251613,0.591782,0.90628
4,0.2,[190],0.168116,0.152632,0.187097,0.573184,0.930136
5,0.25,[103],0.131783,0.165049,0.109677,0.543961,0.945472
6,0.3,[58],0.075117,0.137931,0.051613,0.519482,0.952045
7,0.35,[36],0.052356,0.138889,0.032258,0.512208,0.95594
8,0.4,[13],0.035714,0.230769,0.019355,0.508413,0.960565
9,0.45,[7],0.012346,0.142857,0.006452,0.502467,0.961052


## Map Raw to Patient

In [None]:
def output_mapper(model, dataset, idx, x, chosen_threshold, y_true):
  
  keys = list(idx.keys())

  # Indexing referenced relative to the FULL dataset (i.e., index 0 to 27k)
  if dataset == 'train':
    start_key_pos = 0
    end_key_pos = np.int(0.7*len(keys))
    sliced_keys = keys[start_key_pos : end_key_pos+1]
    
  elif dataset == 'val':
    start_key_pos = np.int(0.7*len(keys))+1
    end_key_pos = np.int(0.85*len(keys))
    sliced_keys = keys[start_key_pos : end_key_pos+1]
    print(sliced_keys)

  elif dataset == 'test':
    start_key_pos = np.int(0.85*len(keys))+1
    sliced_keys = keys[start_key_pos : len(keys)]

  vgg_embedding = embed_model.predict(x)
  y_pred_probs = model.predict(vgg_embedding)
  y_pred = np.where(y_pred_probs >= chosen_threshold, 1, 0)
  true_y_mapped = []
  pred_y_mapped = []

  for i in range(0, len(sliced_keys)):

    # Extract indexing for start and end image per patient
    start_img_idx = idx[sliced_keys[i]][0]    # 0
    end_img_idx = idx[sliced_keys[i]][1]      # 31
    print(start_img_idx, end_img_idx)

    # Slice y true and predicted
    sliced_y = y_true[start_img_idx : end_img_idx]

    if dataset == 'val' or dataset == 'test':
      start_img_idx = start_img_idx - idx[sliced_keys[0]][0]
      end_img_idx = end_img_idx - idx[sliced_keys[0]][0]
      print(start_img_idx, end_img_idx)
    
    sliced_y_pred = y_pred[start_img_idx : end_img_idx]

    # Store in lists
    if 1 in sliced_y:
      true_y_mapped.append(1)
    else:
      true_y_mapped.append(0)
    
    if 1 in sliced_y_pred:
      pred_y_mapped.append(1)
    else:
      pred_y_mapped.append(0)  

  return true_y_mapped, pred_y_mapped  

In [None]:
len(keys)

1233

In [None]:
true_y_train_mapped, pred_y_train_mapped = output_mapper(model, 'train', idx, x_train, 0.05, y)
print(len(true_y_train_mapped), len(pred_y_train_mapped))

0 31
31 52
52 74
74 97
97 118
118 137
137 158
158 177
177 204
204 223
223 239
239 261
261 288
288 307
307 326
326 354
354 373
373 394
394 413
413 440
440 461
461 480
480 499
499 518
518 538
538 557
557 578
578 597
597 616
616 635
635 654
654 670
670 689
689 710
710 729
729 750
750 777
777 799
799 822
822 842
842 871
871 890
890 913
913 938
938 959
959 984
984 1005
1005 1028
1028 1047
1047 1074
1074 1095
1095 1116
1116 1142
1142 1165
1165 1192
1192 1215
1215 1242
1242 1269
1269 1288
1288 1307
1307 1328
1328 1351
1351 1374
1374 1401
1401 1420
1420 1440
1440 1459
1459 1478
1478 1498
1498 1522
1522 1545
1545 1564
1564 1583
1583 1604
1604 1631
1631 1650
1650 1671
1671 1696
1696 1717
1717 1744
1744 1767
1767 1784
1784 1804
1804 1831
1831 1850
1850 1866
1866 1885
1885 1908
1908 1933
1933 1960
1960 1987
1987 2014
2014 2035
2035 2058
2058 2085
2085 2112
2112 2135
2135 2162
2162 2183
2183 2199
2199 2218
2218 2239
2239 2261
2261 2277
2277 2296
2296 2315
2315 2336
2336 2355
2355 2378
2378 2399
239

In [None]:
true_y_val_mapped, pred_y_val_mapped = output_mapper(model, 'val', idx, x_val, 0.05, y)
print(len(true_y_val_mapped), len(pred_y_val_mapped))

['11036_1001056', '11038_1001058', '11039_1001059', '11040_1001060', '11041_1001061', '11042_1001062', '11044_1001064', '11046_1001066', '11047_1001067', '11048_1001068', '11049_1001069', '11050_1001070', '11051_1001071', '11053_1001073', '11054_1001074', '11056_1001077', '11057_1001078', '11058_1001079', '11059_1001080', '11059_1001081', '11060_1001082', '11061_1001083', '11062_1001084', '11063_1001085', '11064_1001086', '11065_1001087', '11066_1001088', '11068_1001090', '11069_1001091', '11070_1001092', '11071_1001093', '11073_1001095', '11075_1001097', '11076_1001098', '11077_1001099', '11078_1001100', '11079_1001101', '11082_1001104', '11084_1001106', '11085_1001107', '11086_1001108', '11087_1001109', '11087_1001110', '11088_1001111', '11089_1001112', '11090_1001113', '11091_1001114', '11092_1001115', '11094_1001117', '11096_1001119', '11097_1001120', '11098_1001121', '11099_1001122', '11100_1001123', '11101_1001124', '11102_1001125', '11103_1001126', '11104_1001127', '11105_100112

In [None]:
true_y_test_mapped, pred_y_test_mapped = output_mapper(model, 'test', idx, x_test, 0.05, y)
print(len(true_y_test_mapped), len(pred_y_test_mapped))

23234 23255
0 21
23255 23274
21 40
23274 23301
40 67
23301 23336
67 102
23336 23357
102 123
23357 23385
123 151
23385 23406
151 172
23406 23425
172 191
23425 23444
191 210
23444 23471
210 237
23471 23490
237 256
23490 23513
256 279
23513 23540
279 306
23540 23559
306 325
23559 23582
325 348
23582 23605
348 371
23605 23635
371 401
23635 23655
401 421
23655 23680
421 446
23680 23709
446 475
23709 23728
475 494
23728 23755
494 521
23755 23786
521 552
23786 23805
552 571
23805 23824
571 590
23824 23851
590 617
23851 23870
617 636
23870 23891
636 657
23891 23912
657 678
23912 23939
678 705
23939 23964
705 730
23964 23985
730 751
23985 24006
751 772
24006 24035
772 801
24035 24058
801 824
24058 24091
824 857
24091 24107
857 873
24107 24130
873 896
24130 24146
896 912
24146 24177
912 943
24177 24204
943 970
24204 24223
970 989
24223 24242
989 1008
24242 24261
1008 1027
24261 24280
1027 1046
24280 24304
1046 1070
24304 24330
1070 1096
24330 24353
1096 1119
24353 24372
1119 1138
24372 24399
113

In [None]:
accuracy_train = accuracy_score(true_y_train_mapped, pred_y_train_mapped)
precision_train = precision_score(true_y_train_mapped, pred_y_train_mapped)
recall_train = recall_score(true_y_train_mapped, pred_y_train_mapped)
f1_train = f1_score(true_y_train_mapped, pred_y_train_mapped)
auc_train = roc_auc_score(true_y_train_mapped, pred_y_train_mapped)

print("----- TRAIN -----")
print(f"F1 Score: {f1_train:.4f}")
print(f"Precision: {precision_train:.4f}")
print(f"Recall: {recall_train:.4f}")
print(f"AUC: {auc_train:.4f}")
print(f"Accuracy: {accuracy_train:.4f}")
print("\n")

accuracy_val = accuracy_score(true_y_val_mapped, pred_y_val_mapped)
precision_val = precision_score(true_y_val_mapped, pred_y_val_mapped)
recall_val = recall_score(true_y_val_mapped, pred_y_val_mapped)
f1_val = f1_score(true_y_val_mapped, pred_y_val_mapped)
auc_val = roc_auc_score(true_y_val_mapped, pred_y_val_mapped)

print("----- VALIDATION -----")
print(f"F1 Score: {f1_val:.4f}")
print(f"Precision: {precision_val:.4f}")
print(f"Recall: {recall_val:.4f}")
print(f"AUC: {auc_val:.4f}")
print(f"Accuracy: {accuracy_val:.4f}")
print("\n")

accuracy_test = accuracy_score(true_y_test_mapped, pred_y_test_mapped)
precision_test = precision_score(true_y_test_mapped, pred_y_test_mapped)
recall_test = recall_score(true_y_test_mapped, pred_y_test_mapped)
f1_test = f1_score(true_y_test_mapped, pred_y_test_mapped)
auc_test = roc_auc_score(true_y_test_mapped, pred_y_test_mapped)

print("----- TEST -----")
print(f"F1 Score: {f1_test:.4f}")
print(f"Precision: {precision_test:.4f}")
print(f"Recall: {recall_test:.4f}")
print(f"AUC: {auc_test:.4f}")
print(f"Accuracy: {accuracy_test:.4f}")
print("\n")

# Define the metrics and corresponding values
metrics = ["F1 Score", "Precision", "Recall", "AUC", "Accuracy"]
train_scores = [f1_train, precision_train, recall_train, auc_train, accuracy_train]
val_scores = [f1_val, precision_val, recall_val, auc_val, accuracy_val]
test_scores = [f1_test, precision_test, recall_test, auc_test, accuracy_test]

# Create the dataframe
data = {"Train": train_scores, "Validation": val_scores, "Test": test_scores}
df = pd.DataFrame(data, index=metrics)

# Print the dataframe
df.transpose()

----- TRAIN -----
F1 Score: 0.2891
Precision: 0.1692
Recall: 0.9911
AUC: 0.6332
Accuracy: 0.3681


----- VALIDATION -----
F1 Score: 0.3771
Precision: 0.2340
Recall: 0.9706
AUC: 0.6277
Accuracy: 0.4108


----- TEST -----
F1 Score: 0.3270
Precision: 0.2016
Recall: 0.8667
AUC: 0.5989
Accuracy: 0.4185




Unnamed: 0,F1 Score,Precision,Recall,AUC,Accuracy
Train,0.289062,0.169207,0.991071,0.633169,0.368056
Validation,0.377143,0.234043,0.970588,0.627678,0.410811
Test,0.327044,0.20155,0.866667,0.598918,0.418478


In [None]:
start_key_pos = np.int(0.85*len(keys))+1
end_key_pos = len(keys)
print('Starting key position =', start_key_pos, '| End key position =', end_key_pos)
sliced_keys = keys[start_key_pos : end_key_pos+1]
print('Patient IDs =', sliced_keys)
i = -1
start_img_idx = idx[sliced_keys[i]][0] #- idx[sliced_keys[0]][0]   # 0
end_img_idx = idx[sliced_keys[i]][1] #- idx[sliced_keys[0]][0]    # 31
print('Start image index =', start_img_idx, '| End image index =', end_img_idx)

Starting key position = 1049 | End key position = 1233
Patient IDs = ['11257_1001280', '11259_1001282', '11260_1001283', '11261_1001284', '11262_1001285', '11263_1001286', '11264_1001287', '11265_1001288', '11267_1001290', '11268_1001291', '11269_1001292', '11270_1001293', '11271_1001294', '11272_1001295', '11273_1001296', '11275_1001298', '11276_1001299', '11278_1001301', '11279_1001302', '11280_1001303', '11281_1001304', '11282_1001305', '11284_1001307', '11286_1001309', '11287_1001310', '11288_1001311', '11289_1001312', '11290_1001313', '11291_1001314', '11292_1001315', '11293_1001316', '11294_1001317', '11295_1001318', '11297_1001320', '11298_1001321', '11299_1001322', '11301_1001324', '11303_1001326', '11304_1001327', '11305_1001328', '11306_1001329', '11307_1001330', '11308_1001331', '11309_1001332', '11310_1001333', '11311_1001334', '11312_1001335', '11314_1001337', '11315_1001338', '11316_1001339', '11317_1001340', '11318_1001341', '11319_1001342', '11320_1001343', '11321_10013

In [None]:
idx['11475_1001499']

[29412, 29431]

In [None]:
keys = list(idx.keys())
keys[-1]

'11475_1001499'

In [None]:
x.shape

(27342, 128, 128, 3)

In [None]:
pd.DataFrame(pd.DataFrame(idx).iloc[:,-1])

Unnamed: 0,11475_1001499
0,29412
1,29431


In [None]:
x = 27340  # Value to search for

found_key = None

for key, value in idx.items():
    if value[1] == x:
        found_key = key
        break

print(found_key)

11377_1001400


In [None]:
idx['11377_1001400']

[27313, 27340]