In [1]:
import time
import pyrootutils
import pytorch_lightning as pl
import wandb

# set pythonpath and working directory to folder containing .project-root file
root = pyrootutils.setup_root(".", indicator=".project-root", pythonpath=True, cwd=True)

from src.data.face_age_datamodule import FaceAgeDataModule
from src.models.face_age_module import FaceAgeModule


def main():
    
    # execute for 3 seeds
    for i in range(3):
        # set seed for reproducibility
        pl.seed_everything(i+42)

        data_dir = root / "data"
        log_dir = root / "logs" / time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())

        use_wandb = True
        age_norm_value = 80
        use_augmented_dataset = False
        loss_fn = "MSELoss"  # "SmoothL1Loss"

        # 1
        net = "SimpleConvNet_100x100"
        img_size = (100, 100)
        imagenet_normalization = False
        exp_name = f"SimpleConvNet+{img_size}+augmented={use_augmented_dataset}+{loss_fn}"

        # 2
        # net = "SimpleConvNet_224x224"
        # img_size = (224, 224)
        # imagenet_normalization = False
        # exp_name = f"SimpleConvNet+{img_size}+augmented={use_augmented_dataset}+{loss_fn}"

        # 3
        # net = "EffNet_224x224"
        # img_size = (224, 224)
        # imagenet_normalization = True
        # exp_name = f"EffNet+{img_size}+augmented={use_augmented_dataset}+{loss_fn}"

        datamodule = FaceAgeDataModule(
            data_dir=data_dir,
            img_size=img_size,
            imagenet_normalization=imagenet_normalization,
            use_augmented_dataset=use_augmented_dataset,
            normalize_age_by=age_norm_value,
            num_workers=0,
            batch_size=32,
            pin_memory=True,
        )

        model = FaceAgeModule(net=net, rescale_age_by=age_norm_value, loss_fn=loss_fn)

        callbacks = []
        loggers = []

        # this controls how checkpoints are saved
        callbacks.append(
            pl.callbacks.ModelCheckpoint(
                monitor="val/loss",
                dirpath=log_dir / "checkpoints",
                save_top_k=1,  # save the best checkpoint
                save_last=True,  # additionally the save the last checkpoint
                mode="min",
                save_weights_only=True,
                filename="best-checkpoint",
            )
        )

        # this configurates optional weights&biases logger
        if use_wandb:
            loggers.append(
                pl.loggers.WandbLogger(
                    project="face-age",
                    save_dir=log_dir,
                    name=exp_name,
                    group=exp_name,
                )
            )

        # trainer setup
        trainer = pl.Trainer(
            accelerator="gpu",
            default_root_dir=log_dir,
            callbacks=callbacks,
            logger=loggers,
            max_epochs=10,
            val_check_interval=0.2,  # frequency of validation epoch
        )

        # train
        trainer.fit(model=model, datamodule=datamodule)

        # test
        trainer.test(model=model, datamodule=datamodule, ckpt_path="best")

        if use_wandb:
            wandb.finish()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
main()

Global seed set to 42
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mhobglob[0m. Use [1m`wandb login --relogin`[0m to force relogin


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type                  | Params
-------------------------------------------------------
0 | net          | SimpleConvNet_100x100 | 5.1 M 
1 | criterion    | MSELoss               | 0     
2 | train_mae    | MeanAbsoluteError     | 0     
3 | val_mae      | MeanAbsoluteError     | 0     
4 | test_mae     | MeanAbsoluteError     | 0     
5 | train_loss   | MeanMetric            | 0     
6 | val_loss     | MeanMetric            | 0     
7 | test_loss    | MeanMetric            | 0     
8 | val_mae_best | MinMetric             | 0     
-------------------------------------------------------
5.1 M     Trainable params
0         Non-trainable params
5.1 M     Total params
20.502    Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 4:  85%|████████▍ | 649/767 [00:21<00:03, 30.25it/s, loss=0.0281, v_num=c5gt, val/loss=0.0434, val/mae=12.90, val/mae_best=12.40, train/loss=0.0295, train/mae=10.20]