In [1]:
from torch import optim
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
import torch
import random
import numpy as np
import torch.nn as nn
import albumentations as Albu
import pandas as pd
from torch.utils.data.sampler import RandomSampler
from warmup_scheduler import GradualWarmupScheduler
import os
from utils.dataset import PandasDataset
from utils.metrics import model_checkpoint
from utils.train import train_model
from utils.models import EfficientNetApi, EfficientNetApiGem

In [2]:
seed = 42
shuffle = True
batch_size = 8
num_workers = 4
output_classes = 5
init_lr = 3e-4
warmup_factor = 10
warmup_epochs = 1
n_epochs = 50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
loss_function = nn.BCEWithLogitsLoss()

torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

ROOT_DIR = '../..'

data_dir = '../../../dataset'
images_dir = os.path.join(data_dir, 'tiles')

Using device: cuda


In [3]:
load_model = efficientnet_b0(
     weights=EfficientNet_B0_Weights.DEFAULT
)
model = EfficientNetApiGem(model=load_model, output_dimensions=output_classes)
model = model.to(device)

In [4]:
print("Using device:", device)
loss_function = nn.BCEWithLogitsLoss()

torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

Using device: cuda


In [5]:
df_train_ = pd.read_csv(f"{ROOT_DIR}/data/train_5fold.csv")
df_train_.columns = df_train_.columns.str.strip()
train_indexes = np.where((df_train_['fold'] != 3))[0]
valid_indexes = np.where((df_train_['fold'] == 3))[0]
#
df_train = df_train_.loc[train_indexes]
df_val = df_train_.loc[valid_indexes]
df_test = pd.read_csv(f"{ROOT_DIR}/data/test.csv")

#### view data

In [6]:
(df_train.shape, df_val.shape, df_test.shape)

((7219, 5), (1805, 5), (1592, 4))

In [7]:
transforms = Albu.Compose([
    Albu.Transpose(p=0.5),
    Albu.VerticalFlip(p=0.5),
    Albu.HorizontalFlip(p=0.5),
])

In [8]:
df_train.columns = df_train.columns.str.strip()

train_dataset = PandasDataset(images_dir, df_train, transforms=transforms)
valid_dataset = PandasDataset(images_dir, df_val, transforms=None)
test_dataset = PandasDataset(images_dir, df_test, transforms=None)

In [9]:
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, num_workers=num_workers, sampler=RandomSampler(train_dataset)
)
valid_loader = torch.utils.data.DataLoader(
    valid_dataset, batch_size=batch_size, num_workers=num_workers, sampler = RandomSampler(valid_dataset)
)
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=batch_size, num_workers=num_workers, sampler = RandomSampler(test_dataset)
)

In [10]:
optimizer = optim.Adam(model.parameters(), lr = init_lr / warmup_factor)
scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, n_epochs - warmup_epochs)
scheduler = GradualWarmupScheduler(optimizer, multiplier = warmup_factor, total_epoch = warmup_epochs, after_scheduler=scheduler_cosine)

In [None]:
train_model(
    model=model,
    epochs=n_epochs,
    optimizer=optimizer,
    scheduler=scheduler,
    train_dataloader=train_loader,
    valid_dataloader=valid_loader,
    checkpoint=model_checkpoint,
    device=device,
    loss_function=loss_function,
    path_to_save_metrics="logs/with-noise-rgb.txt",
    path_to_save_model="models/efficientnet.pth",
    patience=5,
)

Epoch 1/50



loss: 0.36324, smooth loss: 0.38067: 100%|██████████| 903/903 [09:47<00:00,  1.54it/s]
100%|██████████| 226/226 [01:34<00:00,  2.40it/s]


metrics {'val_loss': np.float32(0.33544368), 'val_acc': {'mean': np.float64(42.97844869494438), 'std': np.float64(1.1492846610278245), 'ci_5': np.float64(41.160665452480316), 'ci_95': np.float64(44.93074715137482)}, 'val_kappa': {'mean': np.float64(0.7305816838224273), 'std': np.float64(0.012443114199415871), 'ci_5': np.float64(0.7098967402685475), 'ci_95': np.float64(0.750981791758538)}, 'val_f1': {'mean': np.float64(0.37400432816147805), 'std': np.float64(0.011673420040754409), 'ci_5': np.float64(0.3559548154473305), 'ci_95': np.float64(0.39350510239601133)}, 'val_recall': {'mean': np.float64(0.3769598953127861), 'std': np.float64(0.010994732953855647), 'ci_5': np.float64(0.3595577135682106), 'ci_95': np.float64(0.39545731991529465)}, 'val_precision': {'mean': np.float64(0.46309591618180274), 'std': np.float64(0.014118803369824127), 'ci_5': np.float64(0.4395256504416466), 'ci_95': np.float64(0.4874818563461304)}}
Salvando o melhor modelo... 0.0 -> 0.7305816838224273
Epoch 2/50



loss: 0.47442, smooth loss: 0.31397:  94%|█████████▍| 848/903 [08:55<00:34,  1.59it/s]

# tests

In [11]:
from utils.metrics import evaluation, format_metrics
model.load_state_dict(
    torch.load(f"models/efficientnet.pth")
)
response = evaluation(model, test_loader, device)
result = format_metrics(response[0])
print(result)

100%|██████████| 199/199 [01:15<00:00,  2.64it/s]


VAL_ACC      Mean: 62.51 | Std: 1.22 | 95% CI: [60.49, 64.45]
VAL_KAPPA    Mean: 0.83 | Std: 0.01 | 95% CI: [0.80, 0.85]
VAL_F1       Mean: 0.57 | Std: 0.01 | 95% CI: [0.55, 0.59]
VAL_RECALL   Mean: 0.57 | Std: 0.01 | 95% CI: [0.55, 0.59]
VAL_PRECISION Mean: 0.57 | Std: 0.01 | 95% CI: [0.55, 0.59]
