In [1]:
from torch import optim
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
import torch
import random
import numpy as np
import torch.nn as nn
import albumentations as Albu
import pandas as pd
from torch.utils.data.sampler import RandomSampler
from warmup_scheduler import GradualWarmupScheduler
import os
from utils.dataset import PandasDataset
from utils.metrics import model_checkpoint
from utils.train import train_model
from utils.models import EfficientNetApi

In [2]:
seed = 42
shuffle = True
batch_size = 6
num_workers = 4
output_classes = 5
init_lr = 3e-4
warmup_factor = 2
warmup_epochs = 1
n_epochs = 50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
loss_function = nn.BCEWithLogitsLoss()

torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

ROOT_DIR = '../..'

data_dir = '../../../dataset'
images_dir = os.path.join(data_dir, 'tiles')

Using device: cuda


In [3]:
load_model = efficientnet_b0(
     weights=EfficientNet_B0_Weights.DEFAULT
)
model = EfficientNetApi(model=load_model, output_dimensions=output_classes, dropout_rate=0.6)
model = model.to(device)

In [4]:
print("Using device:", device)
loss_function = nn.BCEWithLogitsLoss()

torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

Using device: cuda


In [5]:
df_train_ = pd.read_csv(f"{ROOT_DIR}/data/train_5fold.csv")
print(f"Registros originais: {len(df_train_)}")

df_entropy = pd.read_csv(f"{ROOT_DIR}/data/entropy.csv")

# Filtrar registros cujo image_id NÃO existe no df_entropy
df_train_ = df_train_[~df_train_['image_id'].isin(df_entropy['image_id'])].reset_index(drop=True)

print(f"Registros no df_entropy: {len(df_entropy)}")
print(f"Registros filtrados (df_train_): {len(df_train_)}")

# Limpar nomes das colunas
df_train_.columns = df_train_.columns.str.strip()

# Criar índices de treino e validação
train_indexes = np.where(df_train_['fold'] != 3)[0]
valid_indexes = np.where(df_train_['fold'] == 3)[0]

# Separar em treino e validação
df_train = df_train_.loc[train_indexes].reset_index(drop=True)
df_val = df_train_.loc[valid_indexes].reset_index(drop=True)

# Carregar teste
df_test = pd.read_csv(f"{ROOT_DIR}/data/test.csv")

print(f"\nTreino: {len(df_train)} registros")
print(f"Validação: {len(df_val)} registros")
print(f"Teste: {len(df_test)} registros")

Registros originais: 9024
Registros no df_entropy: 903
Registros filtrados (df_train_): 8121

Treino: 6511 registros
Validação: 1610 registros
Teste: 1592 registros


#### view data

In [6]:
(df_train.shape, df_val.shape, df_test.shape)

((6511, 5), (1610, 5), (1592, 4))

In [7]:
transforms = Albu.Compose([
    Albu.Transpose(p=0.5),
    Albu.VerticalFlip(p=0.5),
    Albu.HorizontalFlip(p=0.5),
])

In [8]:
df_train.columns = df_train.columns.str.strip()

train_dataset = PandasDataset(images_dir, df_train, transforms=transforms)
valid_dataset = PandasDataset(images_dir, df_val, transforms=None)
test_dataset = PandasDataset(images_dir, df_test, transforms=None)

In [9]:
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, num_workers=num_workers, sampler=RandomSampler(train_dataset)
)
valid_loader = torch.utils.data.DataLoader(
    valid_dataset, batch_size=batch_size, num_workers=num_workers, sampler = RandomSampler(valid_dataset)
)
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=batch_size, num_workers=num_workers, sampler = RandomSampler(test_dataset)
)

In [10]:
optimizer = optim.Adam(model.parameters(), lr = init_lr / warmup_factor)
scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, n_epochs - warmup_epochs)
scheduler = GradualWarmupScheduler(optimizer, multiplier = warmup_factor, total_epoch = warmup_epochs, after_scheduler=scheduler_cosine)

In [None]:
train_model(
    model=model,
    epochs=n_epochs,
    optimizer=optimizer,
    scheduler=scheduler,
    train_dataloader=train_loader,
    valid_dataloader=valid_loader,
    checkpoint=model_checkpoint,
    device=device,
    loss_function=loss_function,
    path_to_save_metrics="logs/b0-entropy.txt",
    path_to_save_model="models/b0-entropy.pth",
    patience=5,
)

Epoch 1/50



loss: 0.46335, smooth loss: 0.58516:   4%|▎         | 39/1086 [00:21<09:19,  1.87it/s]

# tests

In [12]:
from utils.metrics import evaluation, format_metrics
model.load_state_dict(
    torch.load(f"models/entropy.pth")
)
response = evaluation(model, test_loader, device)
result = format_metrics(response[0])
print(result)

100%|██████████| 266/266 [01:10<00:00,  3.75it/s]


VAL_ACC      Mean: 59.282 | Std: 1.208 | 95% CI: [57.286, 61.244]
VAL_KAPPA    Mean: 0.826 | Std: 0.012 | 95% CI: [0.806, 0.846]
VAL_F1       Mean: 0.537 | Std: 0.013 | 95% CI: [0.516, 0.558]
VAL_RECALL   Mean: 0.541 | Std: 0.013 | 95% CI: [0.520, 0.562]
VAL_PRECISION Mean: 0.551 | Std: 0.013 | 95% CI: [0.530, 0.572]
