In [2]:
import os
import random
from joblib import dump, load
from functools import reduce
from pathlib import Path
from scipy.stats import mode

import tensorflow as tf
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

from pytorch_metric_learning import distances, losses, reducers, testers
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

from sklearn.model_selection import GridSearchCV

import datetime
now = datetime.datetime.now()
timestamp = now.strftime("%Y-%m-%d")

In [4]:
SEED = 42
EXP_NAME = 'v1-ensble'
np.random.seed = SEED
random.seed = SEED
tf.random.set_seed(SEED)
DATA_DIR = Path('../data/all-patches/')
MODEL_OUT_DIR = Path(f'models/{EXP_NAME}_{timestamp}')
# MODEL_OUT_DIR.mkdir(parents=True, exist_ok=False)
TRAIN_SIZE = .7
BATCH_SIZE = 128
LEARNING_RATE = 0.01
EPOCHS = 10
EMBEDDING_SIZE = 128
NUM_CLASSES = 2
IMG_SIZE = (1, 96, 96)

In [4]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

### Removing complete white patches generated for negative class

In [5]:
# no_leish_pt = DATA_DIR.joinpath('no-leish')

# count_rm_img, count_imgs = 0, 0
# for path in no_leish_pt.glob('*.png'):
#     count_imgs += 1
#     with Image.open(path) as img:
#         img = np.asarray(img)
#         size = reduce((lambda x,y: x*y), img.shape)
#         if np.count_nonzero(img == 255) > 0.5 * size:
#             os.remove(path)
#             # print(f'{path} removed')
#             count_rm_img+=1
# print(f'Removed {count_rm_img}. Total = {count_imgs-count_rm_img}')

### Data augmentation

Positive class. Current proportion: 197 - 2803

In [6]:
# datagen = tf.keras.preprocessing.image.ImageDataGenerator(
#     rotation_range=120,
#     horizontal_flip=True,
#     vertical_flip=True,
#     zoom_range=0.3,
#     preprocessing_function=lambda x: x/255
# )

# def generate_data(input_imgs, out_folder):
#     for i in input_imgs:
#         image = np.expand_dims(plt.imread(i), 0)
#         datagen.fit(image)
#         # './semana18/all_patches_splitted/val/leish/'
#         for x, val in zip(datagen.flow(image,
#             save_to_dir=out_folder,
#             save_prefix='aug_',
#             save_format='png'),range(10)):
            # pass

In [7]:
# leish_pt = DATA_DIR.joinpath('leish').glob('*.png')
# out_aug_leish_pt = DATA_DIR.joinpath('aug') # visualize image quality before merging with real images

# out_aug_leish_pt.mkdir(parents=True, exist_ok=False)
# generate_data(leish_pt, out_aug_leish_pt)

Proporção final: 2139 - 2803

### Load data and split

In [8]:
# transform = transforms.Compose([
#     transforms.Grayscale(num_output_channels=1),
#     transforms.Resize((96, 96)),
#     transforms.ToTensor(),  # converte a imagem para torch.Tensor e normaliza [0.0,1.0]
# ])
# dataset = datasets.ImageFolder(root=DATA_DIR, transform=transform)
# dataset

Dataset ImageFolder
    Number of datapoints: 4942
    Root location: ..\data\all-patches
    StandardTransform
Transform: Compose(
               Grayscale(num_output_channels=1)
               Resize(size=(96, 96), interpolation=bilinear, max_size=None, antialias=warn)
               ToTensor()
           )

In [16]:
# train_size = int(TRAIN_SIZE * len(dataset))
# test_size = len(dataset) - train_size
# train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
# test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [5]:
def read_data(data_path):
    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((96, 96)),
        transforms.ToTensor(),
    ])
    dataset = datasets.ImageFolder(root=data_path, transform=transform)

    train_size = int(TRAIN_SIZE * len(dataset))
    # test_size = len(dataset) - train_size
    indices = torch.randperm(len(dataset)).tolist()
    train_indices, test_indices = indices[:train_size], indices[train_size:]
    train_dataset = torch.utils.data.Subset(dataset, train_indices)
    test_dataset = torch.utils.data.Subset(dataset, test_indices)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

    # Obtendo as etiquetas verdadeiras do test_dataset usando Subset
    train_true_labels = [dataset.targets[idx] for idx in train_indices]
    test_true_labels = [dataset.targets[idx] for idx in test_indices]

    leish_train = sum(label == 0 for _, label in train_dataset)
    leish_test = sum(label == 0 for _, label in test_dataset)

    print(f'label format = {dataset.class_to_idx}')
    print(f'train test split proportion = train[{len(train_dataset)}], test[{len(test_dataset)}]')
    print(f'leish in training set = {leish_train}')
    print(f'leish in testing set = {leish_test}')

    return train_dataset, test_dataset, train_loader, test_loader, train_true_labels, test_true_labels

In [6]:
train_dataset, test_dataset, train_loader, test_loader, train_true_labels, test_true_labels = read_data(DATA_DIR)

label format = {'leish': 0, 'no-leish': 1}
train test split proportion = train[3459], test[1483]
leish in training set = 1484
leish in testing set = 655


In [79]:
# leish_train = sum(label == 0 for _, label in train_dataset)
# leish_test = sum(label == 0 for _, label in test_dataset)

# print(f'label format = {dataset.class_to_idx}')
# print(f'train test split proportion = train[{len(train_dataset)}], test[{len(test_dataset)}]')
# print(f'leish in training set = {leish_train}')
# print(f'leish in testing set = {leish_test}')

label format = {'leish': 0, 'no-leish': 1}
train test split proportion = train[3459], test[1483]
leish in training set = 1523
leish in testing set = 616


### Model

In [7]:
class Net(nn.Module):
    def __init__(self, embedding_size):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc_input_size = self.calculate_fc_input_size(IMG_SIZE)
        self.fc1 = nn.Linear(self.fc_input_size, embedding_size)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        return x

    def calculate_fc_input_size(self, input_size):
        x = torch.randn(1, *input_size)
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        return x.size(1)

In [8]:
def train(model, loss_func, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, label) in enumerate(train_loader):
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad()
        embeddings = model(data)
        loss = loss_func(embeddings, label)
        loss.backward()
        optimizer.step()

        if batch_idx % 100 == 0:
            print(
                "Epoch {} Iteration {}: Loss = {}".format(
                    epoch, batch_idx, loss
                )
            )

    return loss

def get_all_embeddings(dataset, model):
    tester = testers.BaseTester()
    return tester.get_all_embeddings(dataset, model)

def test(train_set, test_set, model):
    '''Pegar os melhores hiperparametros do classificador'''
    print('Embedding Train')
    train_embeddings, train_labels = get_all_embeddings(train_set, model)
    print('Embedding Test')
    test_embeddings, test_labels = get_all_embeddings(test_set, model)
    train_labels = train_labels.squeeze(1)
    test_labels = test_labels.squeeze(1)
    print("Computing KNN")
    '''
    clf = KNeighborsClassifier(n_neighbors=3)
    clf.fit(train_embeddings.cpu().numpy(), train_labels.cpu().numpy())
    #pickle.dump(neigh, open('knn_Dataset2Aug+Dataset2Real_multisimilarity_2103.sav', 'wb'))'''

    clf = svm.SVC(kernel = 'rbf', C = 1000, gamma = 1)
    #clf.fit(train_embeddings.cpu().numpy(), train_labels.cpu().numpy())
    # defining parameter range
    params = {
        'C': [10, 100, 1000],
        'gamma': [1, 0.1, 0.01, 0.001],
        }
    #xgb = XGBClassifier()
    #search = RandomizedSearchCV(xgb, param_distributions=params,n_iter=5, scoring='recall_macro', n_jobs=4, verbose=3)
    search = GridSearchCV(clf, params, verbose=3, scoring='recall_macro')

    #PCA
    ss = StandardScaler()
    x_scaled = ss.fit_transform(train_embeddings.cpu())
    x_test_scaled = ss.transform(test_embeddings.cpu())

    pca = PCA()
    Xt = pca.fit_transform(x_scaled)
    Xtest = pca.transform(x_test_scaled)

    p = 0
    t = 0
    for pca in pca.explained_variance_ratio_:
        if t < 90:
            p += 1
            t += pca * 100

    print(f'dimensoes pca: {p}')
    x_train = Xt[:,:p]
    x_test = Xtest[:,:p]
    print(x_train.shape)
    print(x_test.shape)

    search.fit(x_train, train_labels.cpu().numpy())
    print(search.best_params_)
    predicts = search.predict(x_test)

    return test_embeddings, test_labels, predicts

### Training

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [105]:
loss = ['Triplet', 'NPairs', 'CosFace', 'MultiSimilarity']
loss_select = loss[3]

if loss_select == 'Triplet':
  distance = distances.CosineSimilarity()
  reducer = reducers.ThresholdReducer(low=0)
  loss_func = losses.TripletMarginLoss(margin=0.2, distance=distance, reducer=reducer)

if loss_select == 'NPairs':
  loss_func = losses.NPairsLoss()

if loss_select == 'CosFace':
  loss_func = losses.CircleLoss()

if loss_select == 'MultiSimilarity':
  loss_func = losses.MultiSimilarityLoss(alpha = 2, beta = 50, base=0.5)

# train_loader
# test_loader

model = Net(EMBEDDING_SIZE).to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

for epoch in range(1, EPOCHS + 1):
  lss = train(model, loss_func, device, train_loader, optimizer, epoch)

Epoch 1 Iteration 0: Loss = 2.331930637359619
Epoch 2 Iteration 0: Loss = 2.1792194843292236
Epoch 3 Iteration 0: Loss = 2.1913743019104004
Epoch 4 Iteration 0: Loss = 2.1849164962768555
Epoch 5 Iteration 0: Loss = 2.1862926483154297
Epoch 6 Iteration 0: Loss = 2.1997342109680176
Epoch 7 Iteration 0: Loss = 2.1896414756774902
Epoch 8 Iteration 0: Loss = 2.1848902702331543
Epoch 9 Iteration 0: Loss = 2.1934726238250732
Epoch 10 Iteration 0: Loss = 2.1812148094177246


In [162]:
test_embeddings_real, test_labels_real, y_pred = test(train_dataset, test_dataset, model)

Embedding Train


  0%|          | 0/109 [00:03<?, ?it/s]


KeyboardInterrupt: 

In [109]:
tester = testers.BaseTester()
test_embeddings, test_labels =  tester.get_all_embeddings(test_dataset, model)
y_test_int = test_labels.squeeze(1).cpu()
print('LOSS ======== ', loss_select)
print(classification_report(y_test_int, y_pred))

100%|██████████| 47/47 [00:08<00:00,  5.39it/s]

              precision    recall  f1-score   support

           0       0.59      0.76      0.66       616
           1       0.79      0.62      0.70       867

    accuracy                           0.68      1483
   macro avg       0.69      0.69      0.68      1483
weighted avg       0.70      0.68      0.68      1483






In [50]:
def train_and_evaluate(train_set, test_set, model, C, gamma, to_save=False):
    print('Computing embeddings...')
    train_embeddings, train_labels = get_all_embeddings(train_set, model)
    test_embeddings, test_labels = get_all_embeddings(test_set, model)

    train_labels = train_labels.cpu().numpy().ravel()
    test_labels = test_labels.cpu().numpy().ravel()

    scaler = StandardScaler()
    train_embeddings = scaler.fit_transform(train_embeddings.cpu().numpy())
    test_embeddings = scaler.transform(test_embeddings.cpu().numpy())

    pca = PCA(n_components=0.9)
    train_embeddings = pca.fit_transform(train_embeddings)
    test_embeddings = pca.transform(test_embeddings)

    clf = svm.SVC(C=C, gamma=gamma, kernel='rbf')
    clf.fit(train_embeddings, train_labels)

    predictions = clf.predict(test_embeddings)

    if to_save:# Salvar o classificador e os pré-processadores
        dump(clf, MODEL_OUT_DIR.joinpath(f'clf_{loss_select}_{EXP_NAME}_{timestamp}.joblib'))
        dump(scaler, MODEL_OUT_DIR.joinpath(f'scaler_{loss_select}_{EXP_NAME}_{timestamp}.joblib'))
        dump(pca, MODEL_OUT_DIR.joinpath(f'pca_{loss_select}_{EXP_NAME}_{timestamp}.joblib'))

    return test_labels, predictions

In [111]:
recall_class_0_list = []
recall_class_1_list = []

for iteration in range(15):
    print(f'-------> RUN {iteration}')
    if iteration == 14:
        test_labels_real, y_pred = train_and_evaluate(train_dataset, test_dataset, model, True)
    else:
        test_labels_real, y_pred = train_and_evaluate(train_dataset, test_dataset, model)

    report = classification_report(test_labels_real, y_pred, output_dict=True)
    recall_class_0_list.append(report['0']['recall'])
    recall_class_1_list.append(report['1']['recall'])

mean_recall_class_0 = np.mean(recall_class_0_list)
mean_recall_class_1 = np.mean(recall_class_1_list)
std_recall_class_0 = np.std(recall_class_0_list)
std_recall_class_1 = np.std(recall_class_1_list)

print(f'\n Loss ===== {loss_select}')
print(f"Mean Recall (Class 0): {mean_recall_class_0}")
print(f"Mean Recall (Class 1): {mean_recall_class_1}")
print(f"Std Recall (Class 0): {std_recall_class_0}")
print(f"Std Recall (Class 1): {std_recall_class_1}")

-------> RUN 0
Computing embeddings...


100%|██████████| 109/109 [00:13<00:00,  7.90it/s]
100%|██████████| 47/47 [00:08<00:00,  5.52it/s]


-------> RUN 1
Computing embeddings...


100%|██████████| 109/109 [00:14<00:00,  7.64it/s]
100%|██████████| 47/47 [00:08<00:00,  5.31it/s]


-------> RUN 2
Computing embeddings...


100%|██████████| 109/109 [00:14<00:00,  7.41it/s]
100%|██████████| 47/47 [00:08<00:00,  5.26it/s]


-------> RUN 3
Computing embeddings...


100%|██████████| 109/109 [00:14<00:00,  7.48it/s]
100%|██████████| 47/47 [00:08<00:00,  5.30it/s]


-------> RUN 4
Computing embeddings...


100%|██████████| 109/109 [00:14<00:00,  7.44it/s]
100%|██████████| 47/47 [00:08<00:00,  5.23it/s]


-------> RUN 5
Computing embeddings...


100%|██████████| 109/109 [00:14<00:00,  7.55it/s]
100%|██████████| 47/47 [00:08<00:00,  5.26it/s]


-------> RUN 6
Computing embeddings...


100%|██████████| 109/109 [00:14<00:00,  7.32it/s]
100%|██████████| 47/47 [00:09<00:00,  5.02it/s]


-------> RUN 7
Computing embeddings...


100%|██████████| 109/109 [00:15<00:00,  7.10it/s]
100%|██████████| 47/47 [00:09<00:00,  4.95it/s]


-------> RUN 8
Computing embeddings...


100%|██████████| 109/109 [00:15<00:00,  7.13it/s]
100%|██████████| 47/47 [00:09<00:00,  5.18it/s]


-------> RUN 9
Computing embeddings...


100%|██████████| 109/109 [00:15<00:00,  7.13it/s]
100%|██████████| 47/47 [00:09<00:00,  5.09it/s]


-------> RUN 10
Computing embeddings...


100%|██████████| 109/109 [00:15<00:00,  7.09it/s]
100%|██████████| 47/47 [00:09<00:00,  4.79it/s]


-------> RUN 11
Computing embeddings...


100%|██████████| 109/109 [00:16<00:00,  6.51it/s]
100%|██████████| 47/47 [00:10<00:00,  4.63it/s]


-------> RUN 12
Computing embeddings...


100%|██████████| 109/109 [00:16<00:00,  6.54it/s]
100%|██████████| 47/47 [00:10<00:00,  4.69it/s]


-------> RUN 13
Computing embeddings...


100%|██████████| 109/109 [00:15<00:00,  6.97it/s]
100%|██████████| 47/47 [00:09<00:00,  5.10it/s]


-------> RUN 14
Computing embeddings...


100%|██████████| 109/109 [00:15<00:00,  7.08it/s]
100%|██████████| 47/47 [00:09<00:00,  5.11it/s]



 Loss ===== MultiSimilarity
Mean Recall (Class 0): 0.7613636363636365
Mean Recall (Class 1): 0.6239907727797002
Std Recall (Class 0): 1.1102230246251565e-16
Std Recall (Class 1): 0.0


### Saving feature extractor model

In [112]:
model_filename = MODEL_OUT_DIR.joinpath(f'model_{loss_select}_{EXP_NAME}_{timestamp}.pth')
torch.save(model.state_dict(), model_filename)

### Load trained model

Triplet

In [10]:
triplet_model = Net(EMBEDDING_SIZE).to(device)
triplet_model.load_state_dict(torch.load('./models/v1-dml_2023-11-06\model_Triplet_v1-dml_2023-11-06.pth'))

triplet_model.eval()

triplet_clf = load('./models/v1-dml_2023-11-06\clf_Triplet_v1-dml_2023-11-06.joblib')
triplet_scaler = load('./models/v1-dml_2023-11-06\scaler_Triplet_v1-dml_2023-11-06.joblib')
triplet_pca = load('./models/v1-dml_2023-11-06\pca_Triplet_v1-dml_2023-11-06.joblib')

NPairs

In [11]:
npairs_model = Net(EMBEDDING_SIZE).to(device)
npairs_model.load_state_dict(torch.load('./models/v1-dml_2023-11-06\model_NPairs_v1-dml_2023-11-06.pth'))

npairs_model.eval()

npairs_clf = load('./models/v1-dml_2023-11-06\clf_NPairs_v1-dml_2023-11-06.joblib')
npairs_scaler = load('./models/v1-dml_2023-11-06\scaler_NPairs_v1-dml_2023-11-06.joblib')
npairs_pca = load('./models/v1-dml_2023-11-06\pca_NPairs_v1-dml_2023-11-06.joblib')

Cosface

In [12]:
cosface_model = Net(EMBEDDING_SIZE).to(device)
cosface_model.load_state_dict(torch.load('./models/v1-dml_2023-11-06\model_CosFace_v1-dml_2023-11-06.pth'))

cosface_model.eval()

cosface_clf = load('./models/v1-dml_2023-11-06\clf_CosFace_v1-dml_2023-11-06.joblib')
cosface_scaler = load('./models/v1-dml_2023-11-06\scaler_CosFace_v1-dml_2023-11-06.joblib')
cosface_pca = load('./models/v1-dml_2023-11-06\pca_CosFace_v1-dml_2023-11-06.joblib')

MultiSimilarity

In [13]:
multisim_model = Net(EMBEDDING_SIZE).to(device)
multisim_model.load_state_dict(torch.load('./models/v1-dml_2023-11-06\model_MultiSimilarity_v1-dml_2023-11-06.pth'))

multisim_model.eval()

multisim_clf = load('./models/v1-dml_2023-11-06\clf_MultiSimilarity_v1-dml_2023-11-06.joblib')
multisim_scaler = load('./models/v1-dml_2023-11-06\scaler_MultiSimilarity_v1-dml_2023-11-06.joblib')
multisim_pca = load('./models/v1-dml_2023-11-06\pca_MultiSimilarity_v1-dml_2023-11-06.joblib')

### Ensemble methods

In [163]:
# data_path = Path('./data-9500train/all_patches/')
# x_train, x_test, x_train_loader, x_test_loader, y_true_train, y_true_test = read_data(data_path)
# train_dataset, test_dataset, train_loader, test_loader, train_true_labels, test_true_labels

label format = {'leish': 0, 'no-leish': 1}
train test split proportion = train[7357], test[3154]
leish in training set = 5107
leish in testing set = 2234


#### Majority voting

Triplet + CosFace with Majority Voting

In [17]:
classifiers = [triplet_clf, cosface_clf]#, npairs_clf, multisim_clf]
scalers = [triplet_scaler, cosface_scaler]#, npairs_scaler, multisim_scaler]
pcas = [triplet_pca, cosface_pca]#, npairs_pca, multisim_pca]
models = [triplet_model, cosface_model]#, npairs_model, multisim_model]

In [14]:
def get_predictions(x_test, models, classifiers, scalers, pcas):
    '''
    Função para pré-processar e obter previsões de cada classificador
    com base nas embeddings extraídas de cada modelo correspondente.
    '''
    predictions = []
    for model, clf, scaler, pca in zip(models, classifiers, scalers, pcas):
        test_embed, _ = get_all_embeddings(x_test, model)
        test_embed_scaled = scaler.transform(test_embed.cpu().numpy())
        test_embed_pca = pca.transform(test_embed_scaled)

        preds = clf.predict(test_embed_pca)
        predictions.append(preds)

    return predictions

def combine_predictions(predictions):
    '''
    Função para combinar previsões usando votação majoritária
    Axis=0 (col) para votação por amostra
    '''
    return mode(predictions, axis=0)[0]


In [20]:
individual_preds = get_predictions(test_dataset, classifiers, scalers, pcas)
ensemble_prediction = combine_predictions(individual_preds)
print('---- Ensemble result for Triplet and Cosface models')
print(classification_report(test_true_labels, ensemble_prediction, target_names=['Leish', 'No Leish']))

100%|██████████| 47/47 [00:10<00:00,  4.28it/s]
100%|██████████| 47/47 [00:05<00:00,  8.17it/s]


---- Ensemble result for Triplet and Cosface models
              precision    recall  f1-score   support

       Leish       0.64      0.91      0.75       635
    No Leish       0.90      0.62      0.73       848

    accuracy                           0.74      1483
   macro avg       0.77      0.76      0.74      1483
weighted avg       0.79      0.74      0.74      1483



Triplet + Cosface + NPairs with Majority Voting

In [21]:
classifiers = [triplet_clf, cosface_clf, npairs_clf]#, multisim_clf]
scalers = [triplet_scaler, cosface_scaler, npairs_scaler]#, multisim_scaler]
pcas = [triplet_pca, cosface_pca, npairs_pca]#, multisim_pca]
models = [triplet_model, cosface_model, npairs_model]#, multisim_model]

individual_preds = get_predictions(test_dataset, classifiers, scalers, pcas)
ensemble_prediction = combine_predictions(individual_preds)
print('---- Ensemble result for Triplet, Cosface and NPairs models')
print(classification_report(test_true_labels, ensemble_prediction, target_names=['Leish', 'No Leish']))

100%|██████████| 47/47 [00:05<00:00,  8.44it/s]
100%|██████████| 47/47 [00:05<00:00,  8.57it/s]
100%|██████████| 47/47 [00:05<00:00,  8.73it/s]


---- Ensemble result for Triplet, Cosface and NPairs models
              precision    recall  f1-score   support

       Leish       0.65      0.78      0.71       635
    No Leish       0.81      0.68      0.74       848

    accuracy                           0.72      1483
   macro avg       0.73      0.73      0.72      1483
weighted avg       0.74      0.72      0.72      1483



Triplet + Cosface + MultiSimilarity with Majority Voting

In [23]:
classifiers = [triplet_clf, cosface_clf, multisim_clf]#, npairs_clf]
scalers = [triplet_scaler, cosface_scaler, multisim_scaler]#, npairs_scaler]
pcas = [triplet_pca, cosface_pca, multisim_pca]#, npairs_pca]
models = [triplet_model, cosface_model, multisim_model]#, npairs_model]

individual_preds = get_predictions(test_dataset, classifiers, scalers, pcas)
ensemble_prediction = combine_predictions(individual_preds)
print('---- Ensemble result for Triplet, Cosface and MultiSimilarity models')
print(classification_report(test_true_labels, ensemble_prediction, target_names=['Leish', 'No Leish']))

100%|██████████| 47/47 [00:05<00:00,  8.39it/s]
100%|██████████| 47/47 [00:05<00:00,  8.58it/s]
100%|██████████| 47/47 [00:05<00:00,  8.44it/s]


---- Ensemble result for Triplet, Cosface and MultiSimilarity models
              precision    recall  f1-score   support

       Leish       0.65      0.80      0.72       635
    No Leish       0.82      0.68      0.74       848

    accuracy                           0.73      1483
   macro avg       0.73      0.74      0.73      1483
weighted avg       0.74      0.73      0.73      1483



All models with Majority Voting

In [24]:
classifiers = [triplet_clf, cosface_clf, multisim_clf, npairs_clf]
scalers = [triplet_scaler, cosface_scaler, multisim_scaler, npairs_scaler]
pcas = [triplet_pca, cosface_pca, multisim_pca, npairs_pca]
models = [triplet_model, cosface_model, multisim_model, npairs_model]

individual_preds = get_predictions(test_dataset, classifiers, scalers, pcas)
ensemble_prediction = combine_predictions(individual_preds)
print('---- Ensemble result for Triplet, Cosface, MultiSimilarity and NPairs models')
print(classification_report(test_true_labels, ensemble_prediction, target_names=['Leish', 'No Leish']))

100%|██████████| 47/47 [00:05<00:00,  8.24it/s]
100%|██████████| 47/47 [00:05<00:00,  8.48it/s]
100%|██████████| 47/47 [00:05<00:00,  8.08it/s]
100%|██████████| 47/47 [00:05<00:00,  8.50it/s]


---- Ensemble result for Triplet, Cosface, MultiSimilarity and NPairs models
              precision    recall  f1-score   support

       Leish       0.64      0.86      0.73       635
    No Leish       0.86      0.64      0.73       848

    accuracy                           0.73      1483
   macro avg       0.75      0.75      0.73      1483
weighted avg       0.77      0.73      0.73      1483



#### Weighted Voting

Cosface + Triplet with Weighted Voting

In [28]:
def weighted_voting(predictions, weights):
    """
    Função para realizar votação ponderada.
    predictions: Lista de listas com previsões de cada modelo.
    weights: Lista de pesos para cada modelo.
    """
    predictions = np.array(predictions)
    weighted_predictions = np.zeros(predictions[0].shape)
    for preds, weight in zip(predictions, weights):
        weighted_predictions += preds * weight
    # se a soma ponderada for maior ou igual a 0.5, a classe 1 vence, caso contrário, classe 0
    final_preds = np.where(weighted_predictions >= 0.5, 1, 0)

    return final_preds

In [64]:
classifiers = [cosface_clf, triplet_clf]
scalers = [cosface_scaler, triplet_scaler]
pcas = [cosface_pca, triplet_pca]
models = [cosface_model, triplet_model]
weights = [0.4, 0.6]

predictions = get_predictions(test_dataset, classifiers, scalers, pcas)
ensemble_prediction = weighted_voting(predictions, weights)
print('---- Ensemble result for Cosface and Triplet')
print(classification_report(test_true_labels, ensemble_prediction, target_names=['Leish', 'No Leish']))

100%|██████████| 47/47 [00:10<00:00,  4.50it/s]
100%|██████████| 47/47 [00:09<00:00,  5.11it/s]


---- Ensemble result for Cosface and Triplet
              precision    recall  f1-score   support

       Leish       0.69      0.80      0.74       635
    No Leish       0.83      0.73      0.77       848

    accuracy                           0.76      1483
   macro avg       0.76      0.76      0.76      1483
weighted avg       0.77      0.76      0.76      1483



NPairs + CosFace with Weighted Voting

In [68]:
classifiers = [npairs_clf, cosface_clf]
scalers = [npairs_scaler, cosface_scaler]
pcas = [npairs_pca, cosface_pca]
models = [npairs_model, cosface_model]
weights = [0.4, 0.6]

predictions = get_predictions(test_dataset, classifiers, scalers, pcas)
ensemble_prediction = weighted_voting(predictions, weights)
print('---- Ensemble result for NPairs and Cosface')
print(classification_report(test_true_labels, ensemble_prediction, target_names=['Leish', 'No Leish']))

100%|██████████| 47/47 [00:10<00:00,  4.59it/s]
100%|██████████| 47/47 [00:09<00:00,  5.10it/s]


---- Ensemble result for NPairs and Cosface
              precision    recall  f1-score   support

       Leish       0.63      0.72      0.67       635
    No Leish       0.77      0.68      0.72       848

    accuracy                           0.70      1483
   macro avg       0.70      0.70      0.69      1483
weighted avg       0.71      0.70      0.70      1483



Cosface + Triplet + NPairs with Weighted Voting

In [70]:
classifiers = [npairs_clf, cosface_clf, triplet_clf]
scalers = [npairs_scaler, cosface_scaler, triplet_scaler]
pcas = [npairs_pca, cosface_pca, triplet_pca]
models = [npairs_model, cosface_model, triplet_model]
weights = [0.2, 0.6, 0.2]

predictions = get_predictions(test_dataset, classifiers, scalers, pcas)
ensemble_prediction = weighted_voting(predictions, weights)
print('---- Ensemble result for NPairs, Cosface, Triplet models')
print(classification_report(test_true_labels, ensemble_prediction, target_names=['Leish', 'No Leish']))

100%|██████████| 47/47 [00:09<00:00,  4.87it/s]
100%|██████████| 47/47 [00:09<00:00,  5.14it/s]
100%|██████████| 47/47 [00:09<00:00,  5.10it/s]


---- Ensemble result for NPairs, Cosface, Triplet models
              precision    recall  f1-score   support

       Leish       0.63      0.72      0.67       635
    No Leish       0.77      0.68      0.72       848

    accuracy                           0.70      1483
   macro avg       0.70      0.70      0.69      1483
weighted avg       0.71      0.70      0.70      1483



Multisim + Cosface + Triplet with Weighted Voting

In [73]:
classifiers = [multisim_clf, cosface_clf, triplet_clf]
scalers = [multisim_scaler, cosface_scaler, triplet_scaler]
pcas = [multisim_pca, cosface_pca, triplet_pca]
models = [multisim_model, cosface_model, triplet_model]
weights = [0.2, 0.2, 0.6]

predictions = get_predictions(test_dataset, classifiers, scalers, pcas)
ensemble_prediction = weighted_voting(predictions, weights)
print('---- Ensemble result for MultiSim, Cosface, Triplet models')
print(classification_report(test_true_labels, ensemble_prediction, target_names=['Leish', 'No Leish']))

100%|██████████| 47/47 [00:10<00:00,  4.55it/s]
100%|██████████| 47/47 [00:09<00:00,  5.09it/s]
100%|██████████| 47/47 [00:09<00:00,  5.15it/s]


---- Ensemble result for MultiSim, Cosface, Triplet models
              precision    recall  f1-score   support

       Leish       0.69      0.80      0.74       635
    No Leish       0.83      0.73      0.77       848

    accuracy                           0.76      1483
   macro avg       0.76      0.76      0.76      1483
weighted avg       0.77      0.76      0.76      1483



####

Cosface + Triplet == Cosface + Triplet + Multisim<br>
NPairs > MultiSim

#### Stacking

In [15]:
classifiers = [multisim_clf, cosface_clf, triplet_clf]
scalers = [multisim_scaler, cosface_scaler, triplet_scaler]
pcas = [multisim_pca, cosface_pca, triplet_pca]
models = [multisim_model, cosface_model, triplet_model]

In [17]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

def stacking_ensemble(models, classifiers, scalers, pcas, x_train, y_train):
    """
    Treinar um meta-classificador para aprender a melhor maneira de combinar as previsões dos classificadores base.

    :param models: Lista de modelos treinados.
    :param classifiers: Lista de classificadores correspondentes aos modelos.
    :param scalers: Lista de scalers correspondentes aos modelos.
    :param pcas: Lista de objetos PCA correspondentes aos modelos.
    :param x_train: Dados de treinamento.
    :param y_train: Etiquetas verdadeiras de treinamento.
    :return: Um modelo meta-classificador treinado.
    """
    base_predictions = get_predictions(x_train, models, classifiers, scalers, pcas)
    stacked_features = np.column_stack(base_predictions)

    X_meta_train, X_meta_valid, y_meta_train, y_meta_valid = train_test_split(
        stacked_features, y_train, test_size=0.2, random_state=42
    )

    meta_classifier = LogisticRegression()
    meta_classifier.fit(X_meta_train, y_meta_train)

    # meta_score = meta_classifier.score(X_meta_valid, y_meta_valid)
    # print(f"Meta-classificador score: {meta_score}")

    return meta_classifier

In [19]:
meta_clf = stacking_ensemble(models, classifiers, scalers, pcas, train_dataset, train_true_labels)

test_predictions = get_predictions(test_dataset, models, classifiers, scalers, pcas)
stacked_test_features = np.column_stack(test_predictions)
final_predictions = meta_clf.predict(stacked_test_features)
print('---- Ensemble result for Multsim, Cosface and Triplet')
print(classification_report(test_true_labels, final_predictions, target_names=['Leish', 'No-Leish']))

100%|██████████| 109/109 [00:07<00:00, 14.40it/s]
100%|██████████| 109/109 [00:07<00:00, 14.00it/s]
100%|██████████| 109/109 [00:07<00:00, 14.00it/s]
100%|██████████| 47/47 [00:06<00:00,  7.07it/s]
100%|██████████| 47/47 [00:06<00:00,  6.99it/s]
100%|██████████| 47/47 [00:06<00:00,  7.11it/s]


---- Ensemble result for Multsim, Cosface and Triplet
              precision    recall  f1-score   support

       Leish       0.69      0.75      0.72       655
    No-Leish       0.79      0.74      0.76       828

    accuracy                           0.74      1483
   macro avg       0.74      0.74      0.74      1483
weighted avg       0.75      0.74      0.74      1483



In [20]:
classifiers = [cosface_clf, triplet_clf]
scalers = [cosface_scaler, triplet_scaler]
pcas = [cosface_pca, triplet_pca]
models = [cosface_model, triplet_model]

meta_clf = stacking_ensemble(models, classifiers, scalers, pcas, train_dataset, train_true_labels)

test_predictions = get_predictions(test_dataset, models, classifiers, scalers, pcas)
stacked_test_features = np.column_stack(test_predictions)
final_predictions = meta_clf.predict(stacked_test_features)
print('---- Ensemble result for Cosface and Triplet')
print(classification_report(test_true_labels, final_predictions, target_names=['Leish', 'No-Leish']))

100%|██████████| 109/109 [00:07<00:00, 14.57it/s]
100%|██████████| 109/109 [00:07<00:00, 14.97it/s]
100%|██████████| 47/47 [00:06<00:00,  7.16it/s]
100%|██████████| 47/47 [00:06<00:00,  7.11it/s]


---- Ensemble result for Cosface and Triplet
              precision    recall  f1-score   support

       Leish       0.69      0.82      0.75       655
    No-Leish       0.83      0.70      0.76       828

    accuracy                           0.76      1483
   macro avg       0.76      0.76      0.76      1483
weighted avg       0.77      0.76      0.76      1483



In [21]:
classifiers = [triplet_clf, cosface_clf, npairs_clf, multisim_clf]
scalers = [triplet_scaler, cosface_scaler, npairs_scaler, multisim_scaler]
pcas = [triplet_pca, cosface_pca, npairs_pca, multisim_pca]
models = [triplet_model, cosface_model, npairs_model, multisim_model]

meta_clf = stacking_ensemble(models, classifiers, scalers, pcas, train_dataset, train_true_labels)

test_predictions = get_predictions(test_dataset, models, classifiers, scalers, pcas)
stacked_test_features = np.column_stack(test_predictions)
final_predictions = meta_clf.predict(stacked_test_features)
print('---- Ensemble result for all models')
print(classification_report(test_true_labels, final_predictions, target_names=['Leish', 'No-Leish']))

100%|██████████| 109/109 [00:07<00:00, 14.21it/s]
100%|██████████| 109/109 [00:07<00:00, 14.73it/s]
100%|██████████| 109/109 [00:07<00:00, 14.44it/s]
100%|██████████| 109/109 [00:07<00:00, 14.91it/s]
100%|██████████| 47/47 [00:06<00:00,  7.20it/s]
100%|██████████| 47/47 [00:06<00:00,  7.06it/s]
100%|██████████| 47/47 [00:06<00:00,  6.91it/s]
100%|██████████| 47/47 [00:06<00:00,  7.12it/s]


---- Ensemble result for all models
              precision    recall  f1-score   support

       Leish       0.72      0.75      0.73       655
    No-Leish       0.79      0.77      0.78       828

    accuracy                           0.76      1483
   macro avg       0.76      0.76      0.76      1483
weighted avg       0.76      0.76      0.76      1483



In [26]:
# dump(meta_clf, MODEL_OUT_DIR.joinpath(f'metaclf_AllModels_{EXP_NAME}_{timestamp}.joblib'))
meta_clf_logreg = load(MODEL_OUT_DIR.joinpath(f'metaclf_AllModels_{EXP_NAME}_{timestamp}.joblib'))

### DATASET 2 - Getting mean std of models, saving and making ensembles

In [29]:
data_path = Path('./data-9500train/all_patches/')
x_train, x_test, x_train_loader, x_test_loader, y_true_train, y_true_test = read_data(data_path)

label format = {'leish': 0, 'no-leish': 1}
train test split proportion = train[7357], test[3154]
leish in training set = 5156
leish in testing set = 2185


In [30]:
def get_loss_func(loss_select):
    loss_func = None
    if loss_select == 'Triplet':
        distance = distances.CosineSimilarity()
        reducer = reducers.ThresholdReducer(low=0)
        loss_func = losses.TripletMarginLoss(margin=0.2, distance=distance, reducer=reducer)

    if loss_select == 'NPairs':
        loss_func = losses.NPairsLoss()

    if loss_select == 'CosFace':
        loss_func = losses.CircleLoss()

    if loss_select == 'MultiSimilarity':
        loss_func = losses.MultiSimilarityLoss(alpha = 2, beta = 50, base=0.5)

    return loss_func

In [46]:
def show_recall_stats(train_dataset, test_dataset, model, loss_select, C, gamma):
    recall_class_0_list = []
    recall_class_1_list = []

    for iteration in range(15):
        print(f'-------> RUN {iteration}')
        if iteration == 14:
            test_labels_real, y_pred = train_and_evaluate(train_dataset, test_dataset, model, C, gamma, True)
        else:
            test_labels_real, y_pred = train_and_evaluate(train_dataset, test_dataset, model, C, gamma)

        report = classification_report(test_labels_real, y_pred, output_dict=True)
        recall_class_0_list.append(report['0']['recall'])
        recall_class_1_list.append(report['1']['recall'])

    mean_recall_class_0 = np.mean(recall_class_0_list)
    mean_recall_class_1 = np.mean(recall_class_1_list)
    std_recall_class_0 = np.std(recall_class_0_list)
    std_recall_class_1 = np.std(recall_class_1_list)

    print(f'\n Loss ===== {loss_select}')
    print(f"Mean Recall (Class 0): {mean_recall_class_0}")
    print(f"Mean Recall (Class 1): {mean_recall_class_1}")
    print(f"Std Recall (Class 0): {std_recall_class_0}")
    print(f"Std Recall (Class 1): {std_recall_class_1}")

In [53]:
def save_model(loss_select):
    model_filename = MODEL_OUT_DIR.joinpath(f'model_{loss_select}_{EXP_NAME}_{timestamp}.pth')
    torch.save(model.state_dict(), model_filename)

#### Triplet

In [31]:
loss_select = 'Triplet'
loss_func = get_loss_func(loss_select)

# train_loader
# test_loader

model = Net(EMBEDDING_SIZE).to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

for epoch in range(1, EPOCHS + 1):
  lss = train(model, loss_func, device, x_train_loader, optimizer, epoch)

Epoch 1 Iteration 0: Loss = 0.2214372605085373
Epoch 2 Iteration 0: Loss = 0.20653247833251953
Epoch 3 Iteration 0: Loss = 0.21268914639949799
Epoch 4 Iteration 0: Loss = 0.21361389756202698
Epoch 5 Iteration 0: Loss = 0.18814897537231445
Epoch 6 Iteration 0: Loss = 0.22645796835422516
Epoch 7 Iteration 0: Loss = 0.18501169979572296
Epoch 8 Iteration 0: Loss = 0.18607017397880554
Epoch 9 Iteration 0: Loss = 0.20863862335681915
Epoch 10 Iteration 0: Loss = 0.3030323088169098


In [33]:
test_embeddings_real, test_labels_real, y_pred = test(train_dataset, test_dataset, model)

Embedding Train


100%|██████████| 109/109 [00:08<00:00, 12.95it/s]


Embedding Test


100%|██████████| 47/47 [00:06<00:00,  7.09it/s]


Computing KNN
dimensoes pca: 7
(3459, 7)
(1483, 7)
Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END .....................C=10, gamma=1;, score=0.857 total time=   0.3s
[CV 2/5] END .....................C=10, gamma=1;, score=0.841 total time=   0.2s
[CV 3/5] END .....................C=10, gamma=1;, score=0.852 total time=   0.2s
[CV 4/5] END .....................C=10, gamma=1;, score=0.873 total time=   0.2s
[CV 5/5] END .....................C=10, gamma=1;, score=0.843 total time=   0.2s
[CV 1/5] END ...................C=10, gamma=0.1;, score=0.867 total time=   0.1s
[CV 2/5] END ...................C=10, gamma=0.1;, score=0.885 total time=   0.1s
[CV 3/5] END ...................C=10, gamma=0.1;, score=0.877 total time=   0.1s
[CV 4/5] END ...................C=10, gamma=0.1;, score=0.906 total time=   0.1s
[CV 5/5] END ...................C=10, gamma=0.1;, score=0.885 total time=   0.1s
[CV 1/5] END ..................C=10, gamma=0.01;, score=0.902 total time=   0.

In [34]:
y_test_int = test_labels_real.cpu()
print('LOSS ======== ', loss_select)
print(classification_report(y_test_int, y_pred))

100%|██████████| 47/47 [00:07<00:00,  6.00it/s]

              precision    recall  f1-score   support

           0       0.85      0.94      0.89       655
           1       0.95      0.87      0.90       828

    accuracy                           0.90      1483
   macro avg       0.90      0.90      0.90      1483
weighted avg       0.90      0.90      0.90      1483






In [49]:
show_recall_stats(train_dataset, test_dataset, model, loss_select, 10, 0.001)

-------> RUN 0
Computing embeddings...


100%|██████████| 109/109 [00:08<00:00, 13.21it/s]
100%|██████████| 47/47 [00:06<00:00,  7.11it/s]
  y = column_or_1d(y, warn=True)


-------> RUN 1
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.32it/s]
100%|██████████| 47/47 [00:06<00:00,  7.07it/s]
  y = column_or_1d(y, warn=True)


-------> RUN 2
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.24it/s]
100%|██████████| 47/47 [00:07<00:00,  6.67it/s]
  y = column_or_1d(y, warn=True)


-------> RUN 3
Computing embeddings...


100%|██████████| 109/109 [00:08<00:00, 13.60it/s]
100%|██████████| 47/47 [00:07<00:00,  6.56it/s]
  y = column_or_1d(y, warn=True)


-------> RUN 4
Computing embeddings...


100%|██████████| 109/109 [00:08<00:00, 13.48it/s]
100%|██████████| 47/47 [00:07<00:00,  6.57it/s]
  y = column_or_1d(y, warn=True)


-------> RUN 5
Computing embeddings...


100%|██████████| 109/109 [00:08<00:00, 13.26it/s]
100%|██████████| 47/47 [00:07<00:00,  6.34it/s]
  y = column_or_1d(y, warn=True)


-------> RUN 6
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 13.95it/s]
100%|██████████| 47/47 [00:06<00:00,  6.77it/s]
  y = column_or_1d(y, warn=True)


-------> RUN 7
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.59it/s]
100%|██████████| 47/47 [00:06<00:00,  7.15it/s]
  y = column_or_1d(y, warn=True)


-------> RUN 8
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 15.13it/s]
100%|██████████| 47/47 [00:06<00:00,  6.86it/s]
  y = column_or_1d(y, warn=True)


-------> RUN 9
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.48it/s]
100%|██████████| 47/47 [00:06<00:00,  6.99it/s]
  y = column_or_1d(y, warn=True)


-------> RUN 10
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.78it/s]
100%|██████████| 47/47 [00:06<00:00,  7.12it/s]
  y = column_or_1d(y, warn=True)


-------> RUN 11
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.79it/s]
100%|██████████| 47/47 [00:06<00:00,  7.05it/s]
  y = column_or_1d(y, warn=True)


-------> RUN 12
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.86it/s]
100%|██████████| 47/47 [00:06<00:00,  6.99it/s]
  y = column_or_1d(y, warn=True)


-------> RUN 13
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.69it/s]
100%|██████████| 47/47 [00:06<00:00,  7.15it/s]
  y = column_or_1d(y, warn=True)


-------> RUN 14
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.89it/s]
100%|██████████| 47/47 [00:06<00:00,  6.97it/s]
  y = column_or_1d(y, warn=True)



 Loss ===== Triplet
Mean Recall (Class 0): 0.9374045801526717
Mean Recall (Class 1): 0.8659420289855074
Std Recall (Class 0): 1.1102230246251565e-16
Std Recall (Class 1): 2.220446049250313e-16


In [52]:
save_model(loss_select)

#### NPairs

In [54]:
loss_select = 'NPairs'
loss_func = get_loss_func(loss_select)

# train_loader
# test_loader

model = Net(EMBEDDING_SIZE).to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

for epoch in range(1, EPOCHS + 1):
  lss = train(model, loss_func, device, x_train_loader, optimizer, epoch)

Epoch 1 Iteration 0: Loss = 0.6835356950759888
Epoch 2 Iteration 0: Loss = 0.6941480040550232
Epoch 3 Iteration 0: Loss = 0.6890720129013062
Epoch 4 Iteration 0: Loss = 0.6910037994384766
Epoch 5 Iteration 0: Loss = 0.6983157396316528
Epoch 6 Iteration 0: Loss = 0.6921732425689697
Epoch 7 Iteration 0: Loss = 0.6902471780776978
Epoch 8 Iteration 0: Loss = 0.6924971342086792
Epoch 9 Iteration 0: Loss = 0.6919410228729248
Epoch 10 Iteration 0: Loss = 0.6929746270179749


In [55]:
test_embeddings_real, test_labels_real, y_pred = test(train_dataset, test_dataset, model)

Embedding Train


100%|██████████| 109/109 [00:07<00:00, 14.39it/s]


Embedding Test


100%|██████████| 47/47 [00:06<00:00,  6.97it/s]


Computing KNN
dimensoes pca: 3
(3459, 3)
(1483, 3)
Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END .....................C=10, gamma=1;, score=0.611 total time=   0.5s
[CV 2/5] END .....................C=10, gamma=1;, score=0.611 total time=   0.4s
[CV 3/5] END .....................C=10, gamma=1;, score=0.633 total time=   0.4s
[CV 4/5] END .....................C=10, gamma=1;, score=0.614 total time=   0.5s
[CV 5/5] END .....................C=10, gamma=1;, score=0.602 total time=   0.4s
[CV 1/5] END ...................C=10, gamma=0.1;, score=0.648 total time=   0.3s
[CV 2/5] END ...................C=10, gamma=0.1;, score=0.604 total time=   0.2s
[CV 3/5] END ...................C=10, gamma=0.1;, score=0.643 total time=   0.3s
[CV 4/5] END ...................C=10, gamma=0.1;, score=0.645 total time=   0.2s
[CV 5/5] END ...................C=10, gamma=0.1;, score=0.600 total time=   0.3s
[CV 1/5] END ..................C=10, gamma=0.01;, score=0.665 total time=   0.

In [56]:
y_test_int = test_labels_real.cpu()
print('LOSS ======== ', loss_select)
print(classification_report(y_test_int, y_pred))

100%|██████████| 47/47 [00:07<00:00,  6.05it/s]

              precision    recall  f1-score   support

           0       0.62      0.75      0.68       655
           1       0.76      0.63      0.69       828

    accuracy                           0.68      1483
   macro avg       0.69      0.69      0.68      1483
weighted avg       0.70      0.68      0.68      1483






In [57]:
show_recall_stats(train_dataset, test_dataset, model, loss_select, 100, 0.01)

-------> RUN 0
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.54it/s]
100%|██████████| 47/47 [00:06<00:00,  7.03it/s]


-------> RUN 1
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.95it/s]
100%|██████████| 47/47 [00:06<00:00,  7.15it/s]


-------> RUN 2
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.75it/s]
100%|██████████| 47/47 [00:06<00:00,  7.06it/s]


-------> RUN 3
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.73it/s]
100%|██████████| 47/47 [00:06<00:00,  7.10it/s]


-------> RUN 4
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.59it/s]
100%|██████████| 47/47 [00:06<00:00,  7.27it/s]


-------> RUN 5
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.57it/s]
100%|██████████| 47/47 [00:06<00:00,  7.24it/s]


-------> RUN 6
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.44it/s]
100%|██████████| 47/47 [00:06<00:00,  7.05it/s]


-------> RUN 7
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.64it/s]
100%|██████████| 47/47 [00:06<00:00,  7.14it/s]


-------> RUN 8
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.66it/s]
100%|██████████| 47/47 [00:06<00:00,  7.19it/s]


-------> RUN 9
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.45it/s]
100%|██████████| 47/47 [00:06<00:00,  7.03it/s]


-------> RUN 10
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 15.12it/s]
100%|██████████| 47/47 [00:06<00:00,  6.98it/s]


-------> RUN 11
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.71it/s]
100%|██████████| 47/47 [00:06<00:00,  7.07it/s]


-------> RUN 12
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.43it/s]
100%|██████████| 47/47 [00:06<00:00,  7.20it/s]


-------> RUN 13
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.39it/s]
100%|██████████| 47/47 [00:06<00:00,  7.13it/s]


-------> RUN 14
Computing embeddings...


100%|██████████| 109/109 [00:07<00:00, 14.80it/s]
100%|██████████| 47/47 [00:06<00:00,  7.10it/s]



 Loss ===== NPairs
Mean Recall (Class 0): 0.7526717557251907
Mean Recall (Class 1): 0.6280193236714975
Std Recall (Class 0): 1.1102230246251565e-16
Std Recall (Class 1): 1.1102230246251565e-16


In [58]:
save_model(loss_select)

#### CosFace

In [59]:
loss_select = 'CosFace'
loss_func = get_loss_func(loss_select)

# train_loader
# test_loader

model = Net(EMBEDDING_SIZE).to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

for epoch in range(1, EPOCHS + 1):
  lss = train(model, loss_func, device, x_train_loader, optimizer, epoch)

Epoch 1 Iteration 0: Loss = 70.23455047607422
Epoch 2 Iteration 0: Loss = 58.894168853759766
Epoch 3 Iteration 0: Loss = 57.77741241455078
Epoch 4 Iteration 0: Loss = 56.41551208496094
Epoch 5 Iteration 0: Loss = 56.42686462402344
Epoch 6 Iteration 0: Loss = 55.74510955810547
Epoch 7 Iteration 0: Loss = 55.024375915527344
Epoch 8 Iteration 0: Loss = 54.48646545410156
Epoch 9 Iteration 0: Loss = 53.84113311767578
Epoch 10 Iteration 0: Loss = 52.557159423828125


In [60]:
test_embeddings_real, test_labels_real, y_pred = test(train_dataset, test_dataset, model)

Embedding Train


100%|██████████| 109/109 [00:06<00:00, 17.19it/s]


Embedding Test


100%|██████████| 47/47 [00:05<00:00,  8.57it/s]


Computing KNN
dimensoes pca: 8
(3459, 8)
(1483, 8)
Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END .....................C=10, gamma=1;, score=0.557 total time=   0.3s
[CV 2/5] END .....................C=10, gamma=1;, score=0.528 total time=   0.3s
[CV 3/5] END .....................C=10, gamma=1;, score=0.535 total time=   0.3s
[CV 4/5] END .....................C=10, gamma=1;, score=0.545 total time=   0.3s
[CV 5/5] END .....................C=10, gamma=1;, score=0.539 total time=   0.3s
[CV 1/5] END ...................C=10, gamma=0.1;, score=0.759 total time=   0.2s
[CV 2/5] END ...................C=10, gamma=0.1;, score=0.776 total time=   0.2s
[CV 3/5] END ...................C=10, gamma=0.1;, score=0.784 total time=   0.2s
[CV 4/5] END ...................C=10, gamma=0.1;, score=0.785 total time=   0.2s
[CV 5/5] END ...................C=10, gamma=0.1;, score=0.772 total time=   0.2s
[CV 1/5] END ..................C=10, gamma=0.01;, score=0.825 total time=   0.

In [68]:
y_test_int = test_labels_real.cpu()
print('LOSS ======== ', loss_select)
print(classification_report(y_test_int, y_pred))

              precision    recall  f1-score   support

           0       0.85      0.84      0.84       655
           1       0.88      0.88      0.88       828

    accuracy                           0.86      1483
   macro avg       0.86      0.86      0.86      1483
weighted avg       0.86      0.86      0.86      1483



In [69]:
show_recall_stats(train_dataset, test_dataset, model, loss_select, 1000, 0.001)

-------> RUN 0
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.13it/s]
100%|██████████| 47/47 [00:05<00:00,  8.56it/s]


-------> RUN 1
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 18.05it/s]
100%|██████████| 47/47 [00:05<00:00,  8.52it/s]


-------> RUN 2
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.67it/s]
100%|██████████| 47/47 [00:05<00:00,  8.64it/s]


-------> RUN 3
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.98it/s]
100%|██████████| 47/47 [00:05<00:00,  8.51it/s]


-------> RUN 4
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.67it/s]
100%|██████████| 47/47 [00:05<00:00,  8.63it/s]


-------> RUN 5
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.91it/s]
100%|██████████| 47/47 [00:05<00:00,  8.47it/s]


-------> RUN 6
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.50it/s]
100%|██████████| 47/47 [00:05<00:00,  8.49it/s]


-------> RUN 7
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.40it/s]
100%|██████████| 47/47 [00:05<00:00,  8.56it/s]


-------> RUN 8
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.82it/s]
100%|██████████| 47/47 [00:05<00:00,  8.54it/s]


-------> RUN 9
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.77it/s]
100%|██████████| 47/47 [00:05<00:00,  8.50it/s]


-------> RUN 10
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.45it/s]
100%|██████████| 47/47 [00:05<00:00,  8.50it/s]


-------> RUN 11
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.86it/s]
100%|██████████| 47/47 [00:05<00:00,  8.50it/s]


-------> RUN 12
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.47it/s]
100%|██████████| 47/47 [00:05<00:00,  8.54it/s]


-------> RUN 13
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.40it/s]
100%|██████████| 47/47 [00:05<00:00,  8.52it/s]


-------> RUN 14
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.31it/s]
100%|██████████| 47/47 [00:05<00:00,  8.51it/s]



 Loss ===== CosFace
Mean Recall (Class 0): 0.8427480916030532
Mean Recall (Class 1): 0.8768115942028986
Std Recall (Class 0): 2.220446049250313e-16
Std Recall (Class 1): 0.0


In [70]:
save_model(loss_select)

#### MultiSimilarity

In [71]:
loss_select = 'MultiSimilarity'
loss_func = get_loss_func(loss_select)

# train_loader
# test_loader

model = Net(EMBEDDING_SIZE).to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

for epoch in range(1, EPOCHS + 1):
  lss = train(model, loss_func, device, x_train_loader, optimizer, epoch)

Epoch 1 Iteration 0: Loss = 2.3075778484344482
Epoch 2 Iteration 0: Loss = 2.232682466506958
Epoch 3 Iteration 0: Loss = 2.2047579288482666
Epoch 4 Iteration 0: Loss = 2.2096004486083984
Epoch 5 Iteration 0: Loss = 2.2154150009155273
Epoch 6 Iteration 0: Loss = 2.2389025688171387
Epoch 7 Iteration 0: Loss = 2.1953234672546387
Epoch 8 Iteration 0: Loss = 2.1957101821899414
Epoch 9 Iteration 0: Loss = 2.2289013862609863
Epoch 10 Iteration 0: Loss = 2.2216076850891113


In [72]:
test_embeddings_real, test_labels_real, y_pred = test(train_dataset, test_dataset, model)

Embedding Train


100%|██████████| 109/109 [00:06<00:00, 17.35it/s]


Embedding Test


100%|██████████| 47/47 [00:05<00:00,  8.51it/s]


Computing KNN
dimensoes pca: 2
(3459, 2)
(1483, 2)
Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END .....................C=10, gamma=1;, score=0.879 total time=   0.1s
[CV 2/5] END .....................C=10, gamma=1;, score=0.844 total time=   0.1s
[CV 3/5] END .....................C=10, gamma=1;, score=0.863 total time=   0.1s
[CV 4/5] END .....................C=10, gamma=1;, score=0.861 total time=   0.1s
[CV 5/5] END .....................C=10, gamma=1;, score=0.854 total time=   0.1s
[CV 1/5] END ...................C=10, gamma=0.1;, score=0.866 total time=   0.1s
[CV 2/5] END ...................C=10, gamma=0.1;, score=0.832 total time=   0.0s
[CV 3/5] END ...................C=10, gamma=0.1;, score=0.856 total time=   0.1s
[CV 4/5] END ...................C=10, gamma=0.1;, score=0.839 total time=   0.0s
[CV 5/5] END ...................C=10, gamma=0.1;, score=0.853 total time=   0.0s
[CV 1/5] END ..................C=10, gamma=0.01;, score=0.845 total time=   0.

In [73]:
y_test_int = test_labels_real.cpu()
print('LOSS ======== ', loss_select)
print(classification_report(y_test_int, y_pred))

              precision    recall  f1-score   support

           0       0.86      0.88      0.87       655
           1       0.90      0.89      0.89       828

    accuracy                           0.88      1483
   macro avg       0.88      0.88      0.88      1483
weighted avg       0.88      0.88      0.88      1483



In [74]:
show_recall_stats(train_dataset, test_dataset, model, loss_select, 1000, 0.1)

-------> RUN 0
Computing embeddings...


100%|██████████| 109/109 [00:05<00:00, 18.20it/s]
100%|██████████| 47/47 [00:05<00:00,  8.49it/s]


-------> RUN 1
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.99it/s]
100%|██████████| 47/47 [00:05<00:00,  8.73it/s]


-------> RUN 2
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 18.00it/s]
100%|██████████| 47/47 [00:05<00:00,  8.49it/s]


-------> RUN 3
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.88it/s]
100%|██████████| 47/47 [00:05<00:00,  8.56it/s]


-------> RUN 4
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.83it/s]
100%|██████████| 47/47 [00:05<00:00,  8.52it/s]


-------> RUN 5
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 18.03it/s]
100%|██████████| 47/47 [00:05<00:00,  8.46it/s]


-------> RUN 6
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.77it/s]
100%|██████████| 47/47 [00:05<00:00,  8.55it/s]


-------> RUN 7
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.82it/s]
100%|██████████| 47/47 [00:05<00:00,  8.57it/s]


-------> RUN 8
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.93it/s]
100%|██████████| 47/47 [00:05<00:00,  8.47it/s]


-------> RUN 9
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.74it/s]
100%|██████████| 47/47 [00:05<00:00,  8.54it/s]


-------> RUN 10
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.89it/s]
100%|██████████| 47/47 [00:05<00:00,  8.50it/s]


-------> RUN 11
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.90it/s]
100%|██████████| 47/47 [00:05<00:00,  8.49it/s]


-------> RUN 12
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.79it/s]
100%|██████████| 47/47 [00:05<00:00,  8.52it/s]


-------> RUN 13
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.51it/s]
100%|██████████| 47/47 [00:05<00:00,  8.52it/s]


-------> RUN 14
Computing embeddings...


100%|██████████| 109/109 [00:06<00:00, 17.85it/s]
100%|██████████| 47/47 [00:05<00:00,  8.48it/s]



 Loss ===== MultiSimilarity
Mean Recall (Class 0): 0.8778625954198472
Mean Recall (Class 1): 0.885265700483092
Std Recall (Class 0): 1.1102230246251565e-16
Std Recall (Class 1): 2.220446049250313e-16


In [75]:
save_model(loss_select)

### Ensemble methods (dataset 2)

### Load trained models

In [78]:
triplet_model = Net(EMBEDDING_SIZE).to(device)
triplet_model.load_state_dict(torch.load('./models/v1-ensble_2023-11-07\model_Triplet_v1-ensble_2023-11-07.pth'))

triplet_model.eval()

triplet_clf = load('./models/v1-ensble_2023-11-07\clf_Triplet_v1-ensble_2023-11-07.joblib')
triplet_scaler = load('./models/v1-ensble_2023-11-07\scaler_Triplet_v1-ensble_2023-11-07.joblib')
triplet_pca = load('./models/v1-ensble_2023-11-07\pca_Triplet_v1-ensble_2023-11-07.joblib')

In [79]:
cosface_model = Net(EMBEDDING_SIZE).to(device)
cosface_model.load_state_dict(torch.load('./models/v1-ensble_2023-11-07\model_CosFace_v1-ensble_2023-11-07.pth'))

cosface_model.eval()

cosface_clf = load('./models/v1-ensble_2023-11-07\clf_CosFace_v1-ensble_2023-11-07.joblib')
cosface_scaler = load('./models/v1-ensble_2023-11-07\scaler_CosFace_v1-ensble_2023-11-07.joblib')
cosface_pca = load('./models/v1-ensble_2023-11-07\pca_CosFace_v1-ensble_2023-11-07.joblib')

################################

multisim_model = Net(EMBEDDING_SIZE).to(device)
multisim_model.load_state_dict(torch.load('./models/v1-ensble_2023-11-07\model_MultiSimilarity_v1-ensble_2023-11-07.pth'))

multisim_model.eval()

multisim_clf = load('./models/v1-ensble_2023-11-07\clf_MultiSimilarity_v1-ensble_2023-11-07.joblib')
multisim_scaler = load('./models/v1-ensble_2023-11-07\scaler_MultiSimilarity_v1-ensble_2023-11-07.joblib')
multisim_pca = load('./models/v1-ensble_2023-11-07\pca_MultiSimilarity_v1-ensble_2023-11-07.joblib')

#### Weighted voting

Triplet + MultiSim

In [87]:
classifiers = [triplet_clf, multisim_clf]#, cosface_clf]
scalers = [triplet_scaler, multisim_scaler]#,  cosface_scaler]
pcas = [triplet_pca, multisim_pca]#, cosface_pca]
models = [triplet_model, multisim_model]#, cosface_model]

In [85]:
weights = [0.3,0.7]
predictions = get_predictions(test_dataset, models, classifiers, scalers, pcas)
ensemble_prediction = weighted_voting(predictions, weights)
print('---- Ensemble result for Triplet and MultiSimilarity')
print(classification_report(test_true_labels, ensemble_prediction, target_names=['Leish', 'No Leish']))

100%|██████████| 47/47 [00:05<00:00,  8.60it/s]
100%|██████████| 47/47 [00:05<00:00,  8.61it/s]

---- Ensemble result for Triplet and MultiSimilarity
              precision    recall  f1-score   support

       Leish       0.86      0.88      0.87       655
    No Leish       0.90      0.89      0.89       828

    accuracy                           0.88      1483
   macro avg       0.88      0.88      0.88      1483
weighted avg       0.88      0.88      0.88      1483






Triplet + Cosface

In [88]:
classifiers = [triplet_clf, cosface_clf]#, ]multisim_clf
scalers = [triplet_scaler, cosface_scaler]#,  multisim_scaler]
pcas = [triplet_pca, cosface_pca]#, ]multisim_pca
models = [triplet_model, cosface_model]#, ]multisim_model

In [90]:
weights = [0.4,0.6]
predictions = get_predictions(test_dataset, models, classifiers, scalers, pcas)
ensemble_prediction = weighted_voting(predictions, weights)
print('---- Ensemble result for Triplet and Cosface')
print(classification_report(test_true_labels, ensemble_prediction, target_names=['Leish', 'No Leish']))

100%|██████████| 47/47 [00:05<00:00,  8.55it/s]
100%|██████████| 47/47 [00:05<00:00,  8.59it/s]

---- Ensemble result for Triplet and Cosface
              precision    recall  f1-score   support

       Leish       0.84      0.84      0.84       655
    No Leish       0.88      0.88      0.88       828

    accuracy                           0.86      1483
   macro avg       0.86      0.86      0.86      1483
weighted avg       0.86      0.86      0.86      1483






#### Majority Voting

Triplet + Cosface + Multisim

In [98]:
classifiers = [triplet_clf, cosface_clf, multisim_clf]
scalers = [triplet_scaler, cosface_scaler,  multisim_scaler]
pcas = [triplet_pca, cosface_pca, multisim_pca]
models = [triplet_model, cosface_model, multisim_model]

In [100]:
individual_preds = get_predictions(test_dataset, models, classifiers, scalers, pcas)
ensemble_prediction = combine_predictions(individual_preds)
print('---- Ensemble result for Triplet, Cosface, Multisimilarity')
print(classification_report(test_true_labels, ensemble_prediction, target_names=['Leish', 'No Leish']))

100%|██████████| 47/47 [00:05<00:00,  8.47it/s]
100%|██████████| 47/47 [00:05<00:00,  8.60it/s]
100%|██████████| 47/47 [00:05<00:00,  8.70it/s]


---- Ensemble result for Triplet, Cosface, Multisimilarity
              precision    recall  f1-score   support

       Leish       0.90      0.94      0.92       655
    No Leish       0.95      0.92      0.93       828

    accuracy                           0.93      1483
   macro avg       0.92      0.93      0.92      1483
weighted avg       0.93      0.93      0.93      1483



#### Stacking

Triplet + Cosface + Multisim

In [102]:
meta_clf = stacking_ensemble(models, classifiers, scalers, pcas, train_dataset, train_true_labels)

test_predictions = get_predictions(test_dataset, models, classifiers, scalers, pcas)
stacked_test_features = np.column_stack(test_predictions)
final_predictions = meta_clf.predict(stacked_test_features)
print('---- Ensemble result for Triplet, Cosface and MultiSim')
print(classification_report(test_true_labels, final_predictions, target_names=['Leish', 'No-Leish']))

100%|██████████| 109/109 [00:07<00:00, 15.33it/s]
100%|██████████| 109/109 [00:07<00:00, 15.43it/s]
100%|██████████| 109/109 [00:07<00:00, 15.19it/s]
100%|██████████| 47/47 [00:06<00:00,  7.14it/s]
100%|██████████| 47/47 [00:06<00:00,  7.25it/s]
100%|██████████| 47/47 [00:06<00:00,  7.32it/s]


---- Ensemble result for Triplet, Cosface and MultiSim
              precision    recall  f1-score   support

       Leish       0.91      0.90      0.91       655
    No-Leish       0.92      0.93      0.93       828

    accuracy                           0.92      1483
   macro avg       0.92      0.92      0.92      1483
weighted avg       0.92      0.92      0.92      1483

