In [1]:

from BaseVAEs.models.disent.model.ae import EncoderConv64, DecoderConv64, AutoEncoder
from BaseVAEs.models.disent.frameworks.vae.weaklysupervised import AdaVae, AdaCatVae
from BaseVAEs.models.disent.frameworks.vae.unsupervised import BetaVae
from BaseVAEs.models.disent.metrics import metric_dci, metric_mig

In [2]:
import torch
import numpy as np
import time
import matplotlib
matplotlib.use('Agg')
import sys
import os
from torch.utils.tensorboard import SummaryWriter
from rtpt.rtpt import RTPT
from torch.optim import lr_scheduler
from torch.optim import Adam

In [3]:
import BaseVAEs.utils_disent as utils
import BaseVAEs.data as data
from BaseVAEs.args import parse_args_as_dict


In [None]:
sys_argv = [
    "--save-step", "20",
    "--print-step", "1",
    "--learning-rate", "0.0001",
    "--batch-size", "128",
    "--epochs", "200",
    "--exp-name", "unsup-betavae-0-ecr",
    "--n-groups", "4",
    "--n-protos", "6",
    "--seed", "0",
    "--dataset", "ecr",
    "--initials", "YS",
    "--lr-scheduler-warmup-steps", "1000",
    "--data-dir", "Data",
    "--results-dir", "experiments/BaseVAEs/runs/",
    "--n-workers", "0"
]

config = parse_args_as_dict(sys_argv)
print(config)

Device name: cuda:0


In [5]:


def train(model, data_loader, log_samples, optimizer, scheduler, writer, config):
    # Initialize the RTPT (Real-Time Progress Tracking) object for monitoring training progress
    rtpt = RTPT(name_initials=config['initials'], experiment_name='XIC_PrototypeDL', max_iterations=config['epochs'])
    rtpt.start()

    # Initialize warmup steps for learning rate scheduling
    warmup_steps = 0

    # Main training loop over epochs
    for e in range(config['epochs']):
        # Calculate the maximum number of iterations for the progress tracker
        max_iter = len(data_loader)
        # Record the start time of the epoch for performance tracking
        start = time.time()
        # Initialize a dictionary to hold various loss components
        loss_dict = {'z_recon_loss': 0, 'loss': 0, 'kld': 0, 'elbo': 0}

        # Loop over batches of data
        for i, batch in enumerate(data_loader):
            # Manually adjust learning rate for warmup
            if warmup_steps < config['lr_scheduler_warmup_steps']:
                learning_rate = config['learning_rate'] * (warmup_steps + 1) / config['lr_scheduler_warmup_steps']
                optimizer.param_groups[0]['lr'] = learning_rate
            warmup_steps += 1

            # Unpack and prepare the data
            imgs, labels_one_hot, labels_id, shared_labels = batch
            imgs0 = imgs[0].to(config['device'])
            imgs1 = imgs[1].to(config['device'])
            imgs = torch.cat((imgs0, imgs1), dim=0)

            # Prepare the batch for the model
            batch = {'x': (imgs,), 'x_targ': (imgs,)}
            # Compute the training loss
            batch_loss_dict = model.compute_training_loss(batch, batch_idx=i)

            # Extract various loss components
            loss, recon_loss, kl_reg_loss, kl_loss, elbo = \
                batch_loss_dict['train_loss'], batch_loss_dict['recon_loss'], batch_loss_dict['kl_reg_loss'], \
                batch_loss_dict['kl_loss'], batch_loss_dict['elbo']

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Learning rate scheduling
            if config['lr_scheduler'] and e > config['lr_scheduler_warmup_steps']:
                scheduler.step()

            # Accumulate loss values
            loss_dict['z_recon_loss'] += recon_loss.item()
            loss_dict['kld'] += kl_reg_loss.item()
            loss_dict['loss'] += loss.item()
            loss_dict['elbo'] += elbo.item()

        # Calculate average loss over the epoch
        for key in loss_dict.keys():
            loss_dict[key] /= len(data_loader)

        # Update progress tracker
        rtpt.step(subtitle=f'loss={loss_dict["loss"]:2.2f}')

        # Log and print training progress
        if (e + 1) % config['display_step'] == 0 or e == config['epochs'] - 1:
            cur_lr = optimizer.param_groups[0]["lr"]
            writer.add_scalar("lr", cur_lr, global_step=e)
            for key in loss_dict.keys():
                writer.add_scalar(f'train/{key}', loss_dict[key], global_step=e)

        if (e + 1) % config['print_step'] == 0 or e == config['epochs'] - 1:
            print(f'epoch {e} - loss {loss.item():2.4f} - time/epoch {(time.time() - start):2.2f}')
            loss_summary = ' '.join([f'{key} {value:2.4f}' for key, value in loss_dict.items()])
            print(loss_summary)

        # Save model and log samples
        if (e + 1) % config['save_step'] == 0 or e == config['epochs'] - 1 or e == 0:
            state = {
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'ep': e,
                'config': config
            }
            torch.save(state, os.path.join(config['model_dir'], '%05d.pth' % (e)))
            utils.plot_examples(log_samples, model, writer, config, step=e)
            print(f'SAVED - epoch {e} - imgs @ {config["img_dir"]} - model @ {config["model_dir"]}')
            
    


def main(config):

    # get train data
    _data_loader = data.get_dataloader(config)

    # get test set samples
    test_set = data.get_test_set(_data_loader, config)

    # create tb writer
    writer = SummaryWriter(log_dir=config['results_dir'])

    # model setup
    _model = BetaVae(make_optimizer_fn=lambda params: Adam(params, lr=1e-3),
                 make_model_fn=lambda: AutoEncoder(
                     encoder=EncoderConv64(x_shape=(3, 64, 64), z_size=config['n_groups'], z_multiplier=2),
                     decoder=DecoderConv64(x_shape=(3, 64, 64), z_size=config['n_groups']),
                 ),
                 cfg=BetaVae.cfg(beta=4))

    _model = _model.to(config['device'])

    # optimizer setup
    optimizer = torch.optim.Adam(_model.parameters(), lr=config['learning_rate'])

    # learning rate scheduler
    scheduler = None
    if config['lr_scheduler']:
        # TODO: try LambdaLR
        num_steps = len(_data_loader) * config['epochs']
        num_steps += config['lr_scheduler_warmup_steps']
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_steps, eta_min=2e-5)

    # start training
    train(_model, _data_loader, test_set, optimizer, scheduler, writer, config)


In [6]:
main(config=config)

Getting dataloader for ecr
Loading data...
Dataset: ecr
root path: Data/ECR\ECR
root path: c:\Users\yuviu\Desktop\Uni Work\Thesis\XIConceptLearning\experiments\Data\ECR\ECR


AssertionError: Path Data/ECR does not exist

In [19]:
def load_pretrained(model, ckpt):
    model.load_state_dict(ckpt['model'])
    model.proto_dict = ckpt['model_misc']['prototypes']
    model.softmax_temp = ckpt['model_misc']['softmax_temp']
    return model

from ProtoLearning.models.icsn import iCSN



In [5]:
import os
print(os.getcwd())

# change to ../
os.chdir('experiments')
print(os.getcwd())

c:\Users\yuviu\Desktop\Uni Work\Thesis\XIConceptLearning
c:\Users\yuviu\Desktop\Uni Work\Thesis\XIConceptLearning\experiments
