In [None]:
import os
import random
import time

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data.sampler import RandomSampler, SequentialSampler

from datasets.profile_mode import ProfileModeDataset
from models.curatornet import CuratorNet
from utils.curatornet_sampler import SameProfileSizeBatchSampler
from trainers import Trainer
from utils.data import extract_embedding

In [None]:
# Dataset
DATASET = "UGallery"
assert DATASET in ["UGallery", "Wikimedia"]

In [None]:
# Parameters
RNG_SEED = 0
EMBEDDING_PATH = os.path.join("data", DATASET, "embedding-resnet50.npy")
TRAINING_PATH = os.path.join("data", DATASET, "naive-profile-train.csv")
VALIDATION_PATH = os.path.join("data", DATASET, "naive-profile-validation.csv")
CHECKPOINTS_DIR = os.path.join("checkpoints")
USE_GPU = True

# Parameters (training)
SETTINGS = {
    "batch_sampler:batch_size": 128,
    "batch_sampler:profile_items_per_batch": 60_000,
    "dataloader:num_workers": os.cpu_count(),
    "dataloader:pin_memory": True,
    "optimizer:lr": 0.0001,
    "optimizer:weight_decay": 0.0001,
    "scheduler:factor": 0.6,
    "scheduler:patience": 2,
    "scheduler:threshold": 1e-4,
    "train:max_epochs": 10,
    "train:max_lrs": 10,
    "train:non_blocking": True,
    "train:train_per_valid_times": 1,
}

In [None]:
%%time
# Freezing RNG seed if needed
if RNG_SEED is not None:
    print(f"\nUsing random seed...")
    random.seed(RNG_SEED)
    torch.manual_seed(RNG_SEED)
    np.random.seed(RNG_SEED)

# Load embedding from file
print(f"\nLoading embedding from file... ({EMBEDDING_PATH})")
embedding = np.load(EMBEDDING_PATH, allow_pickle=True)

# Extract features and "id2index" mapping
print("\nExtracting data into variables...")
embedding, _, _ = extract_embedding(embedding, verbose=True)
print(f">> Features shape: {embedding.shape}")

# DataLoaders initialization
print("\nInitialize DataLoaders")
# Training DataLoader
train_dataset = ProfileModeDataset(
    csv_file=TRAINING_PATH,
)
print(f">> Training dataset: {len(train_dataset)}")
train_sampler = RandomSampler(train_dataset)
train_batch_sampler = SameProfileSizeBatchSampler(
    sampler=train_sampler,
    batch_size=SETTINGS["batch_sampler:batch_size"],
    profile_items_per_batch=SETTINGS["batch_sampler:profile_items_per_batch"],
)
train_dataloader = DataLoader(
    train_dataset,
    sampler=train_batch_sampler,
    num_workers=SETTINGS["dataloader:num_workers"],
    pin_memory=SETTINGS["dataloader:pin_memory"],
)
print(f">> Training dataloader: {len(train_dataloader)}")
# Validation DataLoader
valid_dataset = ProfileModeDataset(
    csv_file=VALIDATION_PATH,
)
print(f">> Validation dataset: {len(valid_dataset)}")
valid_sampler = SequentialSampler(valid_dataset)
valid_batch_sampler = SameProfileSizeBatchSampler(
    sampler=valid_sampler,
    batch_size=SETTINGS["batch_sampler:batch_size"],
    profile_items_per_batch=SETTINGS["batch_sampler:profile_items_per_batch"],
)
valid_dataloader = DataLoader(
    valid_dataset,
    sampler=valid_batch_sampler,
    num_workers=SETTINGS["dataloader:num_workers"],
    pin_memory=SETTINGS["dataloader:pin_memory"],
)
print(f">> Validation dataloader: {len(valid_dataloader)}")
# Model initialization
print("\nInitialize model")
device = torch.device("cuda:0" if torch.cuda.is_available() and USE_GPU else "cpu")
if torch.cuda.is_available() != USE_GPU:
    print((f"\nNotice: Not using GPU - "
           f"Cuda available ({torch.cuda.is_available()}) "
           f"does not match USE_GPU ({USE_GPU})"
    ))
model = CuratorNet(
    torch.Tensor(embedding),
    input_size=embedding.shape[1],
).to(device)

# Training setup
print("\nSetting up training")
optimizer = optim.Adam(
    model.parameters(),
    lr=SETTINGS["optimizer:lr"],
    weight_decay=SETTINGS["optimizer:weight_decay"],
)
criterion = nn.BCEWithLogitsLoss(reduction="sum")
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="max", factor=SETTINGS["scheduler:factor"],
    patience=SETTINGS["scheduler:patience"], verbose=True,
    threshold=SETTINGS["scheduler:threshold"],
)

# Training
print("\nTraining")


In [None]:
%%time
# Training
version = (
    f"{model.__class__.__name__}_"
    f"{DATASET.lower()}"
    # f"_resnet50_"
    # f"{time.strftime('%Y-%m-%d-%H-%M-%S')}"
)
trainer = Trainer(
    model, device, criterion, optimizer, scheduler,
    checkpoint_dir=CHECKPOINTS_DIR,
    version=version,
)
best_model, best_acc, best_loss, best_epoch = trainer.run(
    SETTINGS["train:max_epochs"], SETTINGS["train:max_lrs"],
    {"train": train_dataloader, "validation": valid_dataloader},
    train_valid_loops=SETTINGS["train:train_per_valid_times"],
)

# Final result
print(f"\nBest ACC {best_acc} reached at epoch {best_epoch}")
print(best_model)