In [1]:
import sys, os
from pathlib import Path
import torch
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

  from .autonotebook import tqdm as notebook_tqdm


## Load flooding configuration file from local device or gcs

In [2]:
from ml4floods.models.config_setup import get_default_config
import pkg_resources

# Set filepath to configuration files
# config_fp = 'path/to/worldfloods_template.json'
config_fp = pkg_resources.resource_filename("ml4floods","models/configurations/worldfloods_template.json")

config = get_default_config(config_fp)
config

Loaded Config for experiment:  worldfloods_demo_test
{   'data_params': {   'batch_size': 32,
                       'bucket_id': 'ml4cc_data_lake',
                       'channel_configuration': 'all',
                       'download': {'test': True, 'train': True, 'val': True},
                       'filter_windows': {   'apply': False,
                                             'threshold_clouds': 0.5,
                                             'version': 'v1'},
                       'input_folder': 'S2',
                       'loader_type': 'local',
                       'num_workers': 4,
                       'path_to_splits': 'worldfloods',
                       'target_folder': 'gt',
                       'test_transformation': {'normalize': True},
                       'train_test_split_file': '2_PROD/2_Mart/worldfloods_v1_0/train_test_split.json',
                       'train_transformation': {'normalize': True},
                       'window_size': [256, 256]}

{'experiment_name': 'worldfloods_demo_test',
 'seed': 12,
 'model_params': {'model_folder': 'gs://ml4cc_data_lake/0_DEV/2_Mart/2_MLModelMart',
  'model_version': 'v1',
  'hyperparameters': {'max_tile_size': 256,
   'metric_monitor': 'val_dice_loss',
   'channel_configuration': 'all',
   'label_names': ['land', 'water', 'cloud'],
   'weight_per_class': [1.93445299, 36.60054169, 2.19400729],
   'model_type': 'linear',
   'num_classes': 3,
   'max_epochs': 10,
   'val_every': 1,
   'lr': 0.0001,
   'lr_decay': 0.5,
   'lr_patience': 2,
   'early_stopping_patience': 4,
   'num_channels': 13},
  'train': True,
  'test': True},
 'data_params': {'loader_type': 'local',
  'num_workers': 4,
  'filter_windows': {'version': 'v1', 'threshold_clouds': 0.5, 'apply': False},
  'download': {'train': True, 'val': True, 'test': True},
  'bucket_id': 'ml4cc_data_lake',
  'path_to_splits': 'worldfloods',
  'train_test_split_file': '2_PROD/2_Mart/worldfloods_v1_0/train_test_split.json',
  'input_folder': '

## Step 2: Setup Dataloader

In [3]:
config.experiment_name = 'training_flooding_bgri'
config.data_params.channel_configuration = 'bgri'
config.model_params.hyperparameters.channel_configuration = 'bgri'
config.model_params.hyperparameters.num_channels = 4
config.data_params.bucket_id = ""
config.model_params

{'model_folder': 'gs://ml4cc_data_lake/0_DEV/2_Mart/2_MLModelMart',
 'model_version': 'v1',
 'hyperparameters': {'max_tile_size': 256,
  'metric_monitor': 'val_dice_loss',
  'channel_configuration': 'bgri',
  'label_names': ['land', 'water', 'cloud'],
  'weight_per_class': [1.93445299, 36.60054169, 2.19400729],
  'model_type': 'linear',
  'num_classes': 3,
  'max_epochs': 10,
  'val_every': 1,
  'lr': 0.0001,
  'lr_decay': 0.5,
  'lr_patience': 2,
  'early_stopping_patience': 4,
  'num_channels': 4},
 'train': True,
 'test': True}

In [4]:
%%time

from ml4floods.models.dataset_setup import get_dataset

config.data_params.batch_size = 64 # control this depending on the space on your GPU!
config.data_params.loader_type = 'local'
config.data_params.path_to_splits = "/mnt/d/Flooding/worldfloods_v1_0" # local folder to download the data
config.data_params.train_test_split_file = "/mnt/d/Flooding/train_test_split_local.json"

config.data_params["download"] = {"train": True, "val": True, "test": True} # download only test data
# config.data_params.train_test_split_file = "2_PROD/2_Mart/worldfloods_v1_0/train_test_split.json" # use this to train with all the data
config.data_params.num_workers = 4

# If files are not in config.data_params.path_to_splits this will trigger the download of the products.
dataset = get_dataset(config.data_params)

train 194151  tiles
val 1284  tiles
test 11  tiles
CPU times: user 1.34 s, sys: 1.37 s, total: 2.71 s
Wall time: 984 ms


## Verfify data loader

#### Verify training data
Data format here: https://github.com/spaceml-org/ml4floods/blob/891fe602880586e7ac821d2f282bf5ec9d4c0795/ml4floods/data/worldfloods/dataset.py#L106

In [5]:
train_dl = dataset.train_dataloader()
train_dl_iter = iter(train_dl)
print(len(train_dl_iter))
batch_train = next(train_dl_iter)

# batch_train["image"].shape, batch_train["mask"].shape

3034


Verify validation data

In [6]:
val_dl = dataset.val_dataloader()

val_dl_iter = iter(val_dl)
print(len(val_dl_iter))
batch_val = next(val_dl_iter)

# batch_val["image"].shape, batch_val["mask"].shape

21


In [7]:
test_dl = dataset.test_dataloader()

test_dl_iter = iter(test_dl)
print(len(test_dl_iter))

batch_test = next(test_dl_iter)
# batch_test["image"].shape, batch_test["mask"].shape

11


### Plot batch by using ml4flood model 
check detail here: https://github.com/spaceml-org/ml4floods/blob/891fe602880586e7ac821d2f282bf5ec9d4c0795/ml4floods/data/worldfloods/dataset.py#L106

In [8]:
import importlib
import matplotlib.pyplot as plt
from models import flooding_model
flooding_model = importlib.reload(flooding_model)

# batch_train_rgb = flooding_model.batch_to_unnorm_rgb(batch_train["image"])
# # batch_train_rgb.shape
# plt.imshow(batch_train_rgb[2])
# plt.show()

# batch_train_rgb_mask = flooding_model.batch_mask_to_rgb(batch_train["mask"])
# plt.imshow(batch_train_rgb_mask[2])
# plt.colorbar()
# plt.show()


## Step 3: Setup Model

In [9]:
 # folder to store the trained model (it will create a subfolder with the name of the experiment)
config.model_params

{'model_folder': 'gs://ml4cc_data_lake/0_DEV/2_Mart/2_MLModelMart',
 'model_version': 'v1',
 'hyperparameters': {'max_tile_size': 256,
  'metric_monitor': 'val_dice_loss',
  'channel_configuration': 'bgri',
  'label_names': ['land', 'water', 'cloud'],
  'weight_per_class': [1.93445299, 36.60054169, 2.19400729],
  'model_type': 'linear',
  'num_classes': 3,
  'max_epochs': 10,
  'val_every': 1,
  'lr': 0.0001,
  'lr_decay': 0.5,
  'lr_patience': 2,
  'early_stopping_patience': 4,
  'num_channels': 4},
 'train': True,
 'test': True}

In [10]:
config.model_params.model_folder = "train_models" 
os.makedirs("train_models", exist_ok=True)
config.model_params.test = False
config.model_params.train = True
config.model_params.hyperparameters.model_type = "unet" # Currently implemented: simplecnn, unet, linear
config.model_params.hyperparameters.metric_monitor = 'val_iou_loss' #IoU Loss
# config.model_params.hyperparameters.num_channels = 3

In [11]:
import copy
from models.flooding_model import WorldFloodsModel, DistilledTrainingModel, WorldFloodsModel2, WorldFloodsModel1
importlib.reload(flooding_model)
simple_model_params = copy.deepcopy(config.model_params)
simple_model_params['hyperparameters']['model_type']="unet_simple"

# model = DistilledTrainingModel(config.model_params, simple_model_params)
model = WorldFloodsModel2(config.model_params) # Focal loss and IoU loss
# model = WorldFloodsModel1(config.model_params) # Focal loss and Dice loss
net = model.network
net

num of channels:  4 , num of classes:  3


UNet(
  (dconv_down1): Sequential(
    (0): Conv2d(4, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
  )
  (dconv_down2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
  )
  (dconv_down3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
  )
  (dconv_down4): Sequential(
    (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
  )
  (maxpool): MaxPool2d(kerne

In [12]:
# Compuatation complexity of network
from ptflops import get_model_complexity_info
macs, params = get_model_complexity_info(net, (config.model_params.hyperparameters.num_channels, config.model_params.hyperparameters.max_tile_size, config.model_params.hyperparameters.max_tile_size), as_strings=True, print_per_layer_stat=True, verbose=True)
print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))

UNet(
  7.78 M, 100.000% Params, 42.51 GMac, 100.000% MACs, 
  (dconv_down1): Sequential(
    39.3 k, 0.505% Params, 2.58 GMac, 6.078% MACs, 
    (0): Conv2d(2.37 k, 0.030% Params, 155.19 MMac, 0.365% MACs, 4, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(0, 0.000% Params, 4.19 MMac, 0.010% MACs, inplace=True)
    (2): Conv2d(36.93 k, 0.474% Params, 2.42 GMac, 5.693% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(0, 0.000% Params, 4.19 MMac, 0.010% MACs, inplace=True)
  )
  (dconv_down2): Sequential(
    221.44 k, 2.845% Params, 3.63 GMac, 8.544% MACs, 
    (0): Conv2d(73.86 k, 0.949% Params, 1.21 GMac, 2.846% MACs, 64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(0, 0.000% Params, 2.1 MMac, 0.005% MACs, inplace=True)
    (2): Conv2d(147.58 k, 1.896% Params, 2.42 GMac, 5.688% MACs, 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(0, 0.000% Params, 2.1 MMac, 0.005% MACs, inplace=Tru

In [13]:
setup_weights_and_biases = False
if setup_weights_and_biases:
    import wandb
    from pytorch_lightning.loggers import WandbLogger

    # UNCOMMENT ON FIRST RUN TO LOGIN TO Weights and Biases (only needs to be done once)
    # wandb.login()
    # run = wandb.init()

    # Specifies who is logging the experiment to wandb
    config['wandb_entity'] = 'ml4floods'
    # Specifies which wandb project to log to, multiple runs can exist in the same project
    config['wandb_project'] = 'worldfloods-notebook-demo-project'

    wandb_logger = WandbLogger(
        name=config.experiment_name,
        project=config.wandb_project, 
        entity=config.wandb_entity
    )
else:
    wandb_logger = None

In [14]:
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

experiment_path = f"{config.model_params.model_folder}/{config.experiment_name}"

checkpoint_callback = ModelCheckpoint(
    dirpath=f"{experiment_path}/checkpoint",
    save_top_k=True,
    verbose=True,
    monitor='val_iou_loss',
    mode='min',
#     prefix=''
)

early_stop_callback = EarlyStopping(
    monitor='val_iou_loss',
    patience=10,
    strict=False,
    verbose=False,
    mode='min'
)
# monitor='val_iou_loss'
# monitor='val_dice_loss'


callbacks = [checkpoint_callback, early_stop_callback]

print(f"The trained model will be stored in {config.model_params.model_folder}/{config.experiment_name}")

The trained model will be stored in train_models/training_flooding_bgri


In [15]:
from pytorch_lightning import Trainer

config.gpus = 1 # which gpu to use
# config.gpus = None # to not use GPU
config.model_params.hyperparameters.max_epochs = 40 # train for maximum 4 epochs

trainer = Trainer(
    fast_dev_run=False,
    logger=wandb_logger,
    callbacks=callbacks,
    default_root_dir=f"{config.model_params.model_folder}/{config.experiment_name}",
    accumulate_grad_batches=1,
    gradient_clip_val=0.0,
    auto_lr_find=False,
    benchmark=False,
    max_epochs=config.model_params.hyperparameters.max_epochs,
    check_val_every_n_epoch=config.model_params.hyperparameters.val_every,
    strategy='dp',
    accelerator='gpu',
    devices=config.gpus
)
# config
# https://wandb.ai/wandb/wandb-lightning/reports/Multi-GPU-Training-Using-PyTorch-Lightning--VmlldzozMTk3NTk
# resume_from_checkpoint='/home/eeaiserver/viplab_projects/satellite-knowledge-distillation/train_models/training_flooding_bgri/checkpoint/epoch=39-step=161799.ckpt'
# resume_from_checkpoint='~/Projects/satellite-knowledge-distillation/train_models/training_flooding_bgri/checkpoint/epoch=30-step=47026.ckpt'

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(model, dataset)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type | Params
---------------------------------
0 | network | UNet | 7.8 M 
---------------------------------
7.8 M     Trainable params
0         Non-trainable params
7.8 M     Total params
31.134    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0:  99%|█████████████████████████████████████████████▋| 3034/3055 [1:32:46<00:38,  1.83s/it, loss=0.122]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 0:  99%|█████████████████████████████████████████████▋| 3035/3055 [1:32:48<00:36,  1.83s/it, loss=0.122][A
Epoch 0:  99%|█████████████████████████████████████████████▋| 3036/3055 [1:32:49<00:34,  1.83s/it, loss=0.122][A
Epoch 0:  99%|█████████████████████████████████████████████▋| 3037/3055 [1:32:50<00:33,  1.83s/it, loss=0.122][A
Epoch 0:  99%|█████████████████████████████████████████████▋| 3038/3055 [1:32:51<00:31,  1.83s/it, loss=0.122][A
Epoch 0:  99%|█████████████████████████████████████████████▊| 3039/3055 [1:32:52<00:29,  1.83s/it, loss=0.122][A
Epoch 0: 100%|███████████████████████████████████████████

Epoch 0, global step 3034: 'val_iou_loss' reached 0.29351 (best 0.29351), saving model to '/home/viplab/VipLabProjects/satellite-knowledge-distillation/train_models/training_flooding_bgri/checkpoint/epoch=0-step=3034.ckpt' as top True


Epoch 1:  99%|█████████████████████████████████████████████▋| 3034/3055 [1:34:11<00:39,  1.86s/it, loss=0.117]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 1:  99%|█████████████████████████████████████████████▋| 3035/3055 [1:34:13<00:37,  1.86s/it, loss=0.117][A
Epoch 1:  99%|█████████████████████████████████████████████▋| 3036/3055 [1:34:14<00:35,  1.86s/it, loss=0.117][A
Epoch 1:  99%|█████████████████████████████████████████████▋| 3037/3055 [1:34:15<00:33,  1.86s/it, loss=0.117][A
Epoch 1:  99%|█████████████████████████████████████████████▋| 3038/3055 [1:34:16<00:31,  1.86s/it, loss=0.117][A
Epoch 1:  99%|█████████████████████████████████████████████▊| 3039/3055 [1:34:17<00:29,  1.86s/it, loss=0.117][A
Epoch 1: 100%|███████████████████████████████████████████

Epoch 1, global step 6068: 'val_iou_loss' was not in top True


Epoch 2:  99%|█████████████████████████████████████████████▋| 3034/3055 [1:36:06<00:39,  1.90s/it, loss=0.136]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 2:  99%|█████████████████████████████████████████████▋| 3035/3055 [1:36:07<00:38,  1.90s/it, loss=0.136][A
Epoch 2:  99%|█████████████████████████████████████████████▋| 3036/3055 [1:36:09<00:36,  1.90s/it, loss=0.136][A
Epoch 2:  99%|█████████████████████████████████████████████▋| 3037/3055 [1:36:10<00:34,  1.90s/it, loss=0.136][A
Epoch 2:  99%|█████████████████████████████████████████████▋| 3038/3055 [1:36:11<00:32,  1.90s/it, loss=0.136][A
Epoch 2:  99%|█████████████████████████████████████████████▊| 3039/3055 [1:36:12<00:30,  1.90s/it, loss=0.136][A
Epoch 2: 100%|███████████████████████████████████████████

Epoch 2, global step 9102: 'val_iou_loss' was not in top True


Epoch 3:  99%|█████████████████████████████████████████████▋| 3034/3055 [1:32:30<00:38,  1.83s/it, loss=0.118]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 3:  99%|█████████████████████████████████████████████▋| 3035/3055 [1:32:31<00:36,  1.83s/it, loss=0.118][A
Epoch 3:  99%|█████████████████████████████████████████████▋| 3036/3055 [1:32:32<00:34,  1.83s/it, loss=0.118][A
Epoch 3:  99%|█████████████████████████████████████████████▋| 3037/3055 [1:32:33<00:32,  1.83s/it, loss=0.118][A
Epoch 3:  99%|█████████████████████████████████████████████▋| 3038/3055 [1:32:34<00:31,  1.83s/it, loss=0.118][A
Epoch 3:  99%|█████████████████████████████████████████████▊| 3039/3055 [1:32:35<00:29,  1.83s/it, loss=0.118][A
Epoch 3: 100%|███████████████████████████████████████████

Epoch 3, global step 12136: 'val_iou_loss' reached 0.26627 (best 0.26627), saving model to '/home/viplab/VipLabProjects/satellite-knowledge-distillation/train_models/training_flooding_bgri/checkpoint/epoch=3-step=12136.ckpt' as top True


Epoch 4:  99%|█████████████████████████████████████████████▋| 3034/3055 [1:27:31<00:36,  1.73s/it, loss=0.115]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 4:  99%|█████████████████████████████████████████████▋| 3035/3055 [1:27:32<00:34,  1.73s/it, loss=0.115][A
Epoch 4:  99%|█████████████████████████████████████████████▋| 3036/3055 [1:27:33<00:32,  1.73s/it, loss=0.115][A
Epoch 4:  99%|█████████████████████████████████████████████▋| 3037/3055 [1:27:34<00:31,  1.73s/it, loss=0.115][A
Epoch 4:  99%|█████████████████████████████████████████████▋| 3038/3055 [1:27:35<00:29,  1.73s/it, loss=0.115][A
Epoch 4:  99%|█████████████████████████████████████████████▊| 3039/3055 [1:27:36<00:27,  1.73s/it, loss=0.115][A
Epoch 4: 100%|███████████████████████████████████████████

Epoch 4, global step 15170: 'val_iou_loss' was not in top True


Epoch 5:  99%|█████████████████████████████████████████████▋| 3034/3055 [1:26:01<00:35,  1.70s/it, loss=0.115]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 5:  99%|█████████████████████████████████████████████▋| 3035/3055 [1:26:02<00:34,  1.70s/it, loss=0.115][A
Epoch 5:  99%|█████████████████████████████████████████████▋| 3036/3055 [1:26:03<00:32,  1.70s/it, loss=0.115][A
Epoch 5:  99%|█████████████████████████████████████████████▋| 3037/3055 [1:26:04<00:30,  1.70s/it, loss=0.115][A
Epoch 5:  99%|█████████████████████████████████████████████▋| 3038/3055 [1:26:05<00:28,  1.70s/it, loss=0.115][A
Epoch 5:  99%|█████████████████████████████████████████████▊| 3039/3055 [1:26:06<00:27,  1.70s/it, loss=0.115][A
Epoch 5: 100%|███████████████████████████████████████████

Epoch 5, global step 18204: 'val_iou_loss' was not in top True


Epoch 6:  99%|█████████████████████████████████████████████▋| 3034/3055 [1:26:41<00:36,  1.71s/it, loss=0.119]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 6:  99%|█████████████████████████████████████████████▋| 3035/3055 [1:26:42<00:34,  1.71s/it, loss=0.119][A
Epoch 6:  99%|█████████████████████████████████████████████▋| 3036/3055 [1:26:43<00:32,  1.71s/it, loss=0.119][A
Epoch 6:  99%|█████████████████████████████████████████████▋| 3037/3055 [1:26:44<00:30,  1.71s/it, loss=0.119][A
Epoch 6:  99%|█████████████████████████████████████████████▋| 3038/3055 [1:26:45<00:29,  1.71s/it, loss=0.119][A
Epoch 6:  99%|█████████████████████████████████████████████▊| 3039/3055 [1:26:46<00:27,  1.71s/it, loss=0.119][A
Epoch 6: 100%|███████████████████████████████████████████

Epoch 6, global step 21238: 'val_iou_loss' was not in top True


Epoch 00007: reducing learning rate of group 0 to 5.0000e-05.
Epoch 7:  99%|█████████████████████████████████████████████▋| 3034/3055 [1:26:56<00:36,  1.72s/it, loss=0.114]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 7:  99%|█████████████████████████████████████████████▋| 3035/3055 [1:26:57<00:34,  1.72s/it, loss=0.114][A
Epoch 7:  99%|█████████████████████████████████████████████▋| 3036/3055 [1:26:58<00:32,  1.72s/it, loss=0.114][A
Epoch 7:  99%|█████████████████████████████████████████████▋| 3037/3055 [1:26:59<00:30,  1.72s/it, loss=0.114][A
Epoch 7:  99%|█████████████████████████████████████████████▋| 3038/3055 [1:27:00<00:29,  1.72s/it, loss=0.114][A
Epoch 7:  99%|█████████████████████████████████████████████▊| 3039/3055 [1:27:01<00:27,  1.72s/it, loss=0.114

Epoch 7, global step 24272: 'val_iou_loss' was not in top True


Epoch 8:  99%|█████████████████████████████████████████████▋| 3034/3055 [1:26:49<00:36,  1.72s/it, loss=0.121]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 8:  99%|█████████████████████████████████████████████▋| 3035/3055 [1:26:50<00:34,  1.72s/it, loss=0.121][A
Epoch 8:  99%|█████████████████████████████████████████████▋| 3036/3055 [1:26:51<00:32,  1.72s/it, loss=0.121][A
Epoch 8:  99%|█████████████████████████████████████████████▋| 3037/3055 [1:26:52<00:30,  1.72s/it, loss=0.121][A
Epoch 8:  99%|█████████████████████████████████████████████▋| 3038/3055 [1:26:54<00:29,  1.72s/it, loss=0.121][A
Epoch 8:  99%|█████████████████████████████████████████████▊| 3039/3055 [1:26:55<00:27,  1.72s/it, loss=0.121][A
Epoch 8: 100%|███████████████████████████████████████████

Epoch 8, global step 27306: 'val_iou_loss' was not in top True


Epoch 9:  99%|█████████████████████████████████████████████▋| 3034/3055 [1:26:51<00:36,  1.72s/it, loss=0.124]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 9:  99%|█████████████████████████████████████████████▋| 3035/3055 [1:26:52<00:34,  1.72s/it, loss=0.124][A
Epoch 9:  99%|█████████████████████████████████████████████▋| 3036/3055 [1:26:53<00:32,  1.72s/it, loss=0.124][A
Epoch 9:  99%|█████████████████████████████████████████████▋| 3037/3055 [1:26:54<00:30,  1.72s/it, loss=0.124][A
Epoch 9:  99%|█████████████████████████████████████████████▋| 3038/3055 [1:26:55<00:29,  1.72s/it, loss=0.124][A
Epoch 9:  99%|█████████████████████████████████████████████▊| 3039/3055 [1:26:56<00:27,  1.72s/it, loss=0.124][A
Epoch 9: 100%|███████████████████████████████████████████

Epoch 9, global step 30340: 'val_iou_loss' was not in top True


Epoch 00010: reducing learning rate of group 0 to 2.5000e-05.
Epoch 10:  99%|████████████████████████████████████████████▋| 3034/3055 [1:27:02<00:36,  1.72s/it, loss=0.109]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 10:  99%|████████████████████████████████████████████▋| 3035/3055 [1:27:03<00:34,  1.72s/it, loss=0.109][A
Epoch 10:  99%|████████████████████████████████████████████▋| 3036/3055 [1:27:04<00:32,  1.72s/it, loss=0.109][A
Epoch 10:  99%|████████████████████████████████████████████▋| 3037/3055 [1:27:05<00:30,  1.72s/it, loss=0.109][A
Epoch 10:  99%|████████████████████████████████████████████▋| 3038/3055 [1:27:06<00:29,  1.72s/it, loss=0.109][A
Epoch 10:  99%|████████████████████████████████████████████▊| 3039/3055 [1:27:07<00:27,  1.72s/it, loss=0.109

Epoch 10, global step 33374: 'val_iou_loss' reached 0.25519 (best 0.25519), saving model to '/home/viplab/VipLabProjects/satellite-knowledge-distillation/train_models/training_flooding_bgri/checkpoint/epoch=10-step=33374.ckpt' as top True


Epoch 11:  99%|████████████████████████████████████████████▋| 3034/3055 [1:23:19<00:34,  1.65s/it, loss=0.116]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 11:  99%|████████████████████████████████████████████▋| 3035/3055 [1:23:21<00:32,  1.65s/it, loss=0.116][A
Epoch 11:  99%|████████████████████████████████████████████▋| 3036/3055 [1:23:22<00:31,  1.65s/it, loss=0.116][A
Epoch 11:  99%|████████████████████████████████████████████▋| 3037/3055 [1:23:23<00:29,  1.65s/it, loss=0.116][A
Epoch 11:  99%|████████████████████████████████████████████▋| 3038/3055 [1:23:23<00:28,  1.65s/it, loss=0.116][A
Epoch 11:  99%|████████████████████████████████████████████▊| 3039/3055 [1:23:25<00:26,  1.65s/it, loss=0.116][A
Epoch 11: 100%|██████████████████████████████████████████

Epoch 11, global step 36408: 'val_iou_loss' was not in top True


Epoch 12:  99%|████████████████████████████████████████████▋| 3034/3055 [1:24:13<00:34,  1.67s/it, loss=0.113]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 12:  99%|████████████████████████████████████████████▋| 3035/3055 [1:24:15<00:33,  1.67s/it, loss=0.113][A
Epoch 12:  99%|████████████████████████████████████████████▋| 3036/3055 [1:24:16<00:31,  1.67s/it, loss=0.113][A
Epoch 12:  99%|████████████████████████████████████████████▋| 3037/3055 [1:24:17<00:29,  1.67s/it, loss=0.113][A
Epoch 12:  99%|████████████████████████████████████████████▋| 3038/3055 [1:24:18<00:28,  1.66s/it, loss=0.113][A
Epoch 12:  99%|████████████████████████████████████████████▊| 3039/3055 [1:24:19<00:26,  1.66s/it, loss=0.113][A
Epoch 12: 100%|██████████████████████████████████████████

Epoch 12, global step 39442: 'val_iou_loss' reached 0.25513 (best 0.25513), saving model to '/home/viplab/VipLabProjects/satellite-knowledge-distillation/train_models/training_flooding_bgri/checkpoint/epoch=12-step=39442.ckpt' as top True


Epoch 13:  99%|████████████████████████████████████████████▋| 3034/3055 [1:24:52<00:35,  1.68s/it, loss=0.118]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 13:  99%|████████████████████████████████████████████▋| 3035/3055 [1:24:53<00:33,  1.68s/it, loss=0.118][A
Epoch 13:  99%|████████████████████████████████████████████▋| 3036/3055 [1:24:54<00:31,  1.68s/it, loss=0.118][A
Epoch 13:  99%|████████████████████████████████████████████▋| 3037/3055 [1:24:55<00:30,  1.68s/it, loss=0.118][A
Epoch 13:  99%|████████████████████████████████████████████▋| 3038/3055 [1:24:56<00:28,  1.68s/it, loss=0.118][A
Epoch 13:  99%|████████████████████████████████████████████▊| 3039/3055 [1:24:57<00:26,  1.68s/it, loss=0.118][A
Epoch 13: 100%|██████████████████████████████████████████

Epoch 13, global step 42476: 'val_iou_loss' was not in top True


Epoch 14:  99%|█████████████████████████████████████████████▋| 3034/3055 [1:25:27<00:35,  1.69s/it, loss=0.11]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 14:  99%|█████████████████████████████████████████████▋| 3035/3055 [1:25:28<00:33,  1.69s/it, loss=0.11][A
Epoch 14:  99%|█████████████████████████████████████████████▋| 3036/3055 [1:25:29<00:32,  1.69s/it, loss=0.11][A
Epoch 14:  99%|█████████████████████████████████████████████▋| 3037/3055 [1:25:30<00:30,  1.69s/it, loss=0.11][A
Epoch 14:  99%|█████████████████████████████████████████████▋| 3038/3055 [1:25:31<00:28,  1.69s/it, loss=0.11][A
Epoch 14:  99%|█████████████████████████████████████████████▊| 3039/3055 [1:25:32<00:27,  1.69s/it, loss=0.11][A
Epoch 14: 100%|██████████████████████████████████████████

Epoch 14, global step 45510: 'val_iou_loss' reached 0.25217 (best 0.25217), saving model to '/home/viplab/VipLabProjects/satellite-knowledge-distillation/train_models/training_flooding_bgri/checkpoint/epoch=14-step=45510.ckpt' as top True


Epoch 15:  99%|████████████████████████████████████████████▋| 3034/3055 [1:26:45<00:36,  1.72s/it, loss=0.117]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 15:  99%|████████████████████████████████████████████▋| 3035/3055 [1:26:46<00:34,  1.72s/it, loss=0.117][A
Epoch 15:  99%|████████████████████████████████████████████▋| 3036/3055 [1:26:47<00:32,  1.72s/it, loss=0.117][A
Epoch 15:  99%|████████████████████████████████████████████▋| 3037/3055 [1:26:48<00:30,  1.71s/it, loss=0.117][A
Epoch 15:  99%|████████████████████████████████████████████▋| 3038/3055 [1:26:49<00:29,  1.71s/it, loss=0.117][A
Epoch 15:  99%|████████████████████████████████████████████▊| 3039/3055 [1:26:50<00:27,  1.71s/it, loss=0.117][A
Epoch 15: 100%|██████████████████████████████████████████

Epoch 15, global step 48544: 'val_iou_loss' was not in top True


Epoch 16:  99%|████████████████████████████████████████████▋| 3034/3055 [1:36:10<00:39,  1.90s/it, loss=0.113]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 16:  99%|████████████████████████████████████████████▋| 3035/3055 [1:36:11<00:38,  1.90s/it, loss=0.113][A
Epoch 16:  99%|████████████████████████████████████████████▋| 3036/3055 [1:36:12<00:36,  1.90s/it, loss=0.113][A
Epoch 16:  99%|████████████████████████████████████████████▋| 3037/3055 [1:36:13<00:34,  1.90s/it, loss=0.113][A
Epoch 16:  99%|████████████████████████████████████████████▋| 3038/3055 [1:36:14<00:32,  1.90s/it, loss=0.113][A
Epoch 16:  99%|████████████████████████████████████████████▊| 3039/3055 [1:36:15<00:30,  1.90s/it, loss=0.113][A
Epoch 16: 100%|██████████████████████████████████████████

Epoch 16, global step 51578: 'val_iou_loss' was not in top True


Epoch 17:  99%|█████████████████████████████████████████████▋| 3034/3055 [1:39:30<00:41,  1.97s/it, loss=0.11]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 17:  99%|█████████████████████████████████████████████▋| 3035/3055 [1:39:31<00:39,  1.97s/it, loss=0.11][A
Epoch 17:  99%|█████████████████████████████████████████████▋| 3036/3055 [1:39:32<00:37,  1.97s/it, loss=0.11][A
Epoch 17:  99%|█████████████████████████████████████████████▋| 3037/3055 [1:39:33<00:35,  1.97s/it, loss=0.11][A
Epoch 17:  99%|█████████████████████████████████████████████▋| 3038/3055 [1:39:34<00:33,  1.97s/it, loss=0.11][A
Epoch 17:  99%|█████████████████████████████████████████████▊| 3039/3055 [1:39:35<00:31,  1.97s/it, loss=0.11][A
Epoch 17: 100%|██████████████████████████████████████████

Epoch 17, global step 54612: 'val_iou_loss' reached 0.24514 (best 0.24514), saving model to '/home/viplab/VipLabProjects/satellite-knowledge-distillation/train_models/training_flooding_bgri/checkpoint/epoch=17-step=54612.ckpt' as top True


Epoch 18:  99%|████████████████████████████████████████████▋| 3034/3055 [1:36:48<00:40,  1.91s/it, loss=0.109]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 18:  99%|████████████████████████████████████████████▋| 3035/3055 [1:36:49<00:38,  1.91s/it, loss=0.109][A
Epoch 18:  99%|████████████████████████████████████████████▋| 3036/3055 [1:36:50<00:36,  1.91s/it, loss=0.109][A
Epoch 18:  99%|████████████████████████████████████████████▋| 3037/3055 [1:36:51<00:34,  1.91s/it, loss=0.109][A
Epoch 18:  99%|████████████████████████████████████████████▋| 3038/3055 [1:36:52<00:32,  1.91s/it, loss=0.109][A
Epoch 18:  99%|████████████████████████████████████████████▊| 3039/3055 [1:36:54<00:30,  1.91s/it, loss=0.109][A
Epoch 18: 100%|██████████████████████████████████████████

Epoch 18, global step 57646: 'val_iou_loss' was not in top True


Epoch 19:  99%|████████████████████████████████████████████▋| 3034/3055 [1:30:38<00:37,  1.79s/it, loss=0.112]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 19:  99%|████████████████████████████████████████████▋| 3035/3055 [1:30:40<00:35,  1.79s/it, loss=0.112][A
Epoch 19:  99%|████████████████████████████████████████████▋| 3036/3055 [1:30:41<00:34,  1.79s/it, loss=0.112][A
Epoch 19:  99%|████████████████████████████████████████████▋| 3037/3055 [1:30:42<00:32,  1.79s/it, loss=0.112][A
Epoch 19:  99%|████████████████████████████████████████████▋| 3038/3055 [1:30:43<00:30,  1.79s/it, loss=0.112][A
Epoch 19:  99%|████████████████████████████████████████████▊| 3039/3055 [1:30:44<00:28,  1.79s/it, loss=0.112][A
Epoch 19: 100%|██████████████████████████████████████████

Epoch 19, global step 60680: 'val_iou_loss' was not in top True


Epoch 20:  99%|████████████████████████████████████████████▋| 3034/3055 [1:29:30<00:37,  1.77s/it, loss=0.116]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 20:  99%|████████████████████████████████████████████▋| 3035/3055 [1:29:32<00:35,  1.77s/it, loss=0.116][A
Epoch 20:  99%|████████████████████████████████████████████▋| 3036/3055 [1:29:33<00:33,  1.77s/it, loss=0.116][A
Epoch 20:  99%|████████████████████████████████████████████▋| 3037/3055 [1:29:34<00:31,  1.77s/it, loss=0.116][A
Epoch 20:  99%|████████████████████████████████████████████▋| 3038/3055 [1:29:35<00:30,  1.77s/it, loss=0.116][A
Epoch 20:  99%|████████████████████████████████████████████▊| 3039/3055 [1:29:36<00:28,  1.77s/it, loss=0.116][A
Epoch 20: 100%|██████████████████████████████████████████

Epoch 20, global step 63714: 'val_iou_loss' was not in top True


Epoch 00021: reducing learning rate of group 0 to 1.2500e-05.
Epoch 21:  99%|█████████████████████████████████████████████▋| 3034/3055 [1:28:40<00:36,  1.75s/it, loss=0.12]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 21:  99%|█████████████████████████████████████████████▋| 3035/3055 [1:28:42<00:35,  1.75s/it, loss=0.12][A
Epoch 21:  99%|█████████████████████████████████████████████▋| 3036/3055 [1:28:43<00:33,  1.75s/it, loss=0.12][A
Epoch 21:  99%|█████████████████████████████████████████████▋| 3037/3055 [1:28:44<00:31,  1.75s/it, loss=0.12][A
Epoch 21:  99%|█████████████████████████████████████████████▋| 3038/3055 [1:28:45<00:29,  1.75s/it, loss=0.12][A
Epoch 21:  99%|█████████████████████████████████████████████▊| 3039/3055 [1:28:46<00:28,  1.75s/it, loss=0.12

Epoch 21, global step 66748: 'val_iou_loss' reached 0.23799 (best 0.23799), saving model to '/home/viplab/VipLabProjects/satellite-knowledge-distillation/train_models/training_flooding_bgri/checkpoint/epoch=21-step=66748.ckpt' as top True


Epoch 22:  99%|████████████████████████████████████████████▋| 3034/3055 [1:33:32<00:38,  1.85s/it, loss=0.106]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 22:  99%|████████████████████████████████████████████▋| 3035/3055 [1:33:34<00:36,  1.85s/it, loss=0.106][A
Epoch 22:  99%|████████████████████████████████████████████▋| 3036/3055 [1:33:35<00:35,  1.85s/it, loss=0.106][A
Epoch 22:  99%|████████████████████████████████████████████▋| 3037/3055 [1:33:36<00:33,  1.85s/it, loss=0.106][A
Epoch 22:  99%|████████████████████████████████████████████▋| 3038/3055 [1:33:37<00:31,  1.85s/it, loss=0.106][A
Epoch 22:  99%|████████████████████████████████████████████▊| 3039/3055 [1:33:38<00:29,  1.85s/it, loss=0.106][A
Epoch 22: 100%|██████████████████████████████████████████

Epoch 22, global step 69782: 'val_iou_loss' was not in top True


Epoch 23:  99%|████████████████████████████████████████████▋| 3034/3055 [1:33:57<00:39,  1.86s/it, loss=0.117]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 23:  99%|████████████████████████████████████████████▋| 3035/3055 [1:33:58<00:37,  1.86s/it, loss=0.117][A
Epoch 23:  99%|████████████████████████████████████████████▋| 3036/3055 [1:33:59<00:35,  1.86s/it, loss=0.117][A
Epoch 23:  99%|████████████████████████████████████████████▋| 3037/3055 [1:34:01<00:33,  1.86s/it, loss=0.117][A
Epoch 23:  99%|████████████████████████████████████████████▋| 3038/3055 [1:34:02<00:31,  1.86s/it, loss=0.117][A
Epoch 23:  99%|████████████████████████████████████████████▊| 3039/3055 [1:34:03<00:29,  1.86s/it, loss=0.117][A
Epoch 23: 100%|██████████████████████████████████████████

Epoch 23, global step 72816: 'val_iou_loss' was not in top True


Epoch 24:  99%|████████████████████████████████████████████▋| 3034/3055 [1:34:36<00:39,  1.87s/it, loss=0.112]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 24:  99%|████████████████████████████████████████████▋| 3035/3055 [1:34:37<00:37,  1.87s/it, loss=0.112][A
Epoch 24:  99%|████████████████████████████████████████████▋| 3036/3055 [1:34:38<00:35,  1.87s/it, loss=0.112][A
Epoch 24:  99%|████████████████████████████████████████████▋| 3037/3055 [1:34:39<00:33,  1.87s/it, loss=0.112][A
Epoch 24:  99%|████████████████████████████████████████████▋| 3038/3055 [1:34:40<00:31,  1.87s/it, loss=0.112][A
Epoch 24:  99%|████████████████████████████████████████████▊| 3039/3055 [1:34:42<00:29,  1.87s/it, loss=0.112][A
Epoch 24: 100%|██████████████████████████████████████████

Epoch 24, global step 75850: 'val_iou_loss' was not in top True


Epoch 00025: reducing learning rate of group 0 to 6.2500e-06.
Epoch 25:  99%|█████████████████████████████████████████████▋| 3034/3055 [1:34:55<00:39,  1.88s/it, loss=0.11]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 25:  99%|█████████████████████████████████████████████▋| 3035/3055 [1:34:56<00:37,  1.88s/it, loss=0.11][A
Epoch 25:  99%|█████████████████████████████████████████████▋| 3036/3055 [1:34:58<00:35,  1.88s/it, loss=0.11][A
Epoch 25:  99%|█████████████████████████████████████████████▋| 3037/3055 [1:34:59<00:33,  1.88s/it, loss=0.11][A
Epoch 25:  99%|█████████████████████████████████████████████▋| 3038/3055 [1:35:00<00:31,  1.88s/it, loss=0.11][A
Epoch 25:  99%|█████████████████████████████████████████████▊| 3039/3055 [1:35:01<00:30,  1.88s/it, loss=0.11

Epoch 25, global step 78884: 'val_iou_loss' was not in top True


Epoch 26:  99%|█████████████████████████████████████████████▋| 3034/3055 [1:34:02<00:39,  1.86s/it, loss=0.11]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 26:  99%|█████████████████████████████████████████████▋| 3035/3055 [1:34:03<00:37,  1.86s/it, loss=0.11][A
Epoch 26:  99%|█████████████████████████████████████████████▋| 3036/3055 [1:34:05<00:35,  1.86s/it, loss=0.11][A
Epoch 26:  99%|█████████████████████████████████████████████▋| 3037/3055 [1:34:06<00:33,  1.86s/it, loss=0.11][A
Epoch 26:  99%|█████████████████████████████████████████████▋| 3038/3055 [1:34:07<00:31,  1.86s/it, loss=0.11][A
Epoch 26:  99%|█████████████████████████████████████████████▊| 3039/3055 [1:34:08<00:29,  1.86s/it, loss=0.11][A
Epoch 26: 100%|██████████████████████████████████████████

Epoch 26, global step 81918: 'val_iou_loss' reached 0.23740 (best 0.23740), saving model to '/home/viplab/VipLabProjects/satellite-knowledge-distillation/train_models/training_flooding_bgri/checkpoint/epoch=26-step=81918.ckpt' as top True


Epoch 27:  99%|████████████████████████████████████████████▋| 3034/3055 [1:34:16<00:39,  1.86s/it, loss=0.113]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 27:  99%|████████████████████████████████████████████▋| 3035/3055 [1:34:18<00:37,  1.86s/it, loss=0.113][A
Epoch 27:  99%|████████████████████████████████████████████▋| 3036/3055 [1:34:19<00:35,  1.86s/it, loss=0.113][A
Epoch 27:  99%|████████████████████████████████████████████▋| 3037/3055 [1:34:20<00:33,  1.86s/it, loss=0.113][A
Epoch 27:  99%|████████████████████████████████████████████▋| 3038/3055 [1:34:21<00:31,  1.86s/it, loss=0.113][A
Epoch 27:  99%|████████████████████████████████████████████▊| 3039/3055 [1:34:22<00:29,  1.86s/it, loss=0.113][A
Epoch 27: 100%|██████████████████████████████████████████

Epoch 27, global step 84952: 'val_iou_loss' reached 0.23645 (best 0.23645), saving model to '/home/viplab/VipLabProjects/satellite-knowledge-distillation/train_models/training_flooding_bgri/checkpoint/epoch=27-step=84952.ckpt' as top True


Epoch 28:  99%|████████████████████████████████████████████▋| 3034/3055 [1:37:08<00:40,  1.92s/it, loss=0.112]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 28:  99%|████████████████████████████████████████████▋| 3035/3055 [1:37:10<00:38,  1.92s/it, loss=0.112][A
Epoch 28:  99%|████████████████████████████████████████████▋| 3036/3055 [1:37:11<00:36,  1.92s/it, loss=0.112][A
Epoch 28:  99%|████████████████████████████████████████████▋| 3037/3055 [1:37:12<00:34,  1.92s/it, loss=0.112][A
Epoch 28:  99%|████████████████████████████████████████████▋| 3038/3055 [1:37:13<00:32,  1.92s/it, loss=0.112][A
Epoch 28:  99%|████████████████████████████████████████████▊| 3039/3055 [1:37:14<00:30,  1.92s/it, loss=0.112][A
Epoch 28: 100%|██████████████████████████████████████████

Epoch 28, global step 87986: 'val_iou_loss' was not in top True


Epoch 29:  99%|████████████████████████████████████████████▋| 3034/3055 [1:37:05<00:40,  1.92s/it, loss=0.116]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 29:  99%|████████████████████████████████████████████▋| 3035/3055 [1:37:06<00:38,  1.92s/it, loss=0.116][A
Epoch 29:  99%|████████████████████████████████████████████▋| 3036/3055 [1:37:07<00:36,  1.92s/it, loss=0.116][A
Epoch 29:  99%|████████████████████████████████████████████▋| 3037/3055 [1:37:08<00:34,  1.92s/it, loss=0.116][A
Epoch 29:  99%|████████████████████████████████████████████▋| 3038/3055 [1:37:10<00:32,  1.92s/it, loss=0.116][A
Epoch 29:  99%|████████████████████████████████████████████▊| 3039/3055 [1:37:11<00:30,  1.92s/it, loss=0.116][A
Epoch 29: 100%|██████████████████████████████████████████

Epoch 29, global step 91020: 'val_iou_loss' was not in top True


Epoch 30:  99%|████████████████████████████████████████████▋| 3034/3055 [1:34:54<00:39,  1.88s/it, loss=0.109]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 30:  99%|████████████████████████████████████████████▋| 3035/3055 [1:34:55<00:37,  1.88s/it, loss=0.109][A
Epoch 30:  99%|████████████████████████████████████████████▋| 3036/3055 [1:34:57<00:35,  1.88s/it, loss=0.109][A
Epoch 30:  99%|████████████████████████████████████████████▋| 3037/3055 [1:34:58<00:33,  1.88s/it, loss=0.109][A
Epoch 30:  99%|████████████████████████████████████████████▋| 3038/3055 [1:34:59<00:31,  1.88s/it, loss=0.109][A
Epoch 30:  99%|████████████████████████████████████████████▊| 3039/3055 [1:35:00<00:30,  1.88s/it, loss=0.109][A
Epoch 30: 100%|██████████████████████████████████████████

Epoch 30, global step 94054: 'val_iou_loss' reached 0.23610 (best 0.23610), saving model to '/home/viplab/VipLabProjects/satellite-knowledge-distillation/train_models/training_flooding_bgri/checkpoint/epoch=30-step=94054.ckpt' as top True


Epoch 31:  99%|████████████████████████████████████████████▋| 3034/3055 [1:34:38<00:39,  1.87s/it, loss=0.109]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 31:  99%|████████████████████████████████████████████▋| 3035/3055 [1:34:39<00:37,  1.87s/it, loss=0.109][A
Epoch 31:  99%|████████████████████████████████████████████▋| 3036/3055 [1:34:40<00:35,  1.87s/it, loss=0.109][A
Epoch 31:  99%|████████████████████████████████████████████▋| 3037/3055 [1:34:41<00:33,  1.87s/it, loss=0.109][A
Epoch 31:  99%|████████████████████████████████████████████▋| 3038/3055 [1:34:42<00:31,  1.87s/it, loss=0.109][A
Epoch 31:  99%|████████████████████████████████████████████▊| 3039/3055 [1:34:43<00:29,  1.87s/it, loss=0.109][A
Epoch 31: 100%|██████████████████████████████████████████

Epoch 31, global step 97088: 'val_iou_loss' was not in top True


Epoch 32:  99%|████████████████████████████████████████████▋| 3034/3055 [1:37:07<00:40,  1.92s/it, loss=0.115]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                      | 0/21 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                         | 0/21 [00:00<?, ?it/s][A
Epoch 32:  99%|████████████████████████████████████████████▋| 3035/3055 [1:37:09<00:38,  1.92s/it, loss=0.115][A
Epoch 32:  99%|████████████████████████████████████████████▋| 3036/3055 [1:37:10<00:36,  1.92s/it, loss=0.115][A
Epoch 32:  99%|████████████████████████████████████████████▋| 3037/3055 [1:37:11<00:34,  1.92s/it, loss=0.115][A
Epoch 32:  99%|████████████████████████████████████████████▋| 3038/3055 [1:37:12<00:32,  1.92s/it, loss=0.115][A
Epoch 32:  99%|████████████████████████████████████████████▊| 3039/3055 [1:37:14<00:30,  1.92s/it, loss=0.115][A
Epoch 32: 100%|██████████████████████████████████████████

Epoch 32, global step 100122: 'val_iou_loss' was not in top True


Epoch 33:  81%|█████████████████████████████████████         | 2463/3055 [1:17:58<18:44,  1.90s/it, loss=0.11]

In [None]:
# import torch
# Run inference on the images shown before

logits = model(batch_train["image"].to(model.device))
print(f"Shape of logits: {logits.shape}")
probs = torch.softmax(logits, dim=1)
print(f"Shape of probs: {probs.shape}")
prediction = torch.argmax(probs, dim=1).long().cpu()
print(f"Shape of prediction: {prediction.shape}")

In [None]:
config.model_params.max_tile_size = config.model_params.hyperparameters.max_tile_size
config

In [None]:
# os.environ["CUDA_VISIBLE_DEVICES"]=""

# import torch
import numpy as np
from ml4floods.models.utils import metrics
from ml4floods.models.model_setup import get_model_inference_function
import pandas as pd

# model.to("cuda")
inference_function = get_model_inference_function(model, config, apply_normalization=False, activation="softmax")

# dataset2 = get_dataset(config.data_params)
dl = dataset.val_dataloader() # pytorch Dataloader
print(str(dl.batch_size))

# Otherwise fails when reading test dataset from remote bucket
# torch.set_num_threads(1)

thresholds_water = [0,1e-3,1e-2]+np.arange(0.5,.96,.05).tolist() + [.99,.995,.999]

mets = metrics.compute_metrics(
    dl,
    inference_function, 
    thresholds_water=thresholds_water, 
    plot=False, convert_targets=False)

label_names = ["land", "water", "cloud"]
metrics.plot_metrics(mets, label_names)

In [None]:
if hasattr(dl.dataset, "image_files"):
    cems_code = [os.path.basename(f).split("_")[0] for f in dl.dataset.image_files]
else:
    cems_code = [os.path.basename(f.file_name).split("_")[0] for f in dl.dataset.list_of_windows]

iou_per_code = pd.DataFrame(metrics.group_confusion(mets["confusions"],cems_code, metrics.calculate_iou,
                                                    label_names=[f"IoU_{l}"for l in ["land", "water", "cloud"]]))

recall_per_code = pd.DataFrame(metrics.group_confusion(mets["confusions"],cems_code, metrics.calculate_recall,
                                                       label_names=[f"Recall_{l}"for l in ["land", "water", "cloud"]]))

join_data_per_code = pd.merge(recall_per_code,iou_per_code,on="code")
join_data_per_code = join_data_per_code.set_index("code")
join_data_per_code = join_data_per_code*100
print(f"Mean values across flood events: {join_data_per_code.mean(axis=0).to_dict()}")
join_data_per_code

In [None]:
torch.save(model.state_dict(),f"{experiment_path}/model_irirnir_worldflood_model_1_epoch_2_gamma_5_alpha_0_001.pt")
# Save cofig file in experiment_path
config_file_path = f"{experiment_path}/config_irirnir_worldflood_model_1_epoch_2_gamma_5_alpha_0_001.json"
import json
with open(config_file_path, 'w') as f:
    json.dump(config, f)

In [None]:
if setup_weights_and_biases:
    torch.save(model.state_dict(), os.path.join(wandb_logger.save_dir, 'model_irirnir_worldflood_model_1_epoch_2_gamma_5_alpha_0_001.pt'))
    wandb.save(os.path.join(wandb_logger.save_dir, 'model_irirnir_worldflood_model_1_epoch_2_gamma_5_alpha_0_001.pt')) # Copy weights to weights and biases server
    wandb.finish()

In [None]:
# Run inference on the images shown before

logits = model(batch_val["image"].to(model.device))
print(f"Shape of logits: {logits.shape}")
probs = torch.softmax(logits, dim=1)
print(f"Shape of probs: {probs.shape}")
prediction = torch.argmax(probs, dim=1).long().cpu()
print(f"Shape of prediction: {prediction.shape}")

In [None]:
n_image_start=7
n_images=14
count=int(n_images-n_image_start)
fig, axs = plt.subplots(4, count, figsize=(18,14),tight_layout=True)
importlib.reload(flooding_model)
flooding_model.plot_batch(batch_val["image"][n_image_start:n_images],channel_configuration="bgri",axs=axs[0],max_clip_val=3500.)
flooding_model.plot_batch(batch_val["image"][n_image_start:n_images],channel_configuration="bgri",bands_show=["B8","B8", "B8"],axs=axs[1],max_clip_val=3500.)
# flooding_model.plot_batch(batch_val["image"][:n_images],bands_show=["B11","B8", "B4"],axs=axs[1],max_clip_val=4500.)
flooding_model.plot_batch_output_v1(batch_val["mask"][n_image_start:n_images, 0],axs=axs[2], show_axis=True)
flooding_model.plot_batch_output_v1(prediction[n_image_start:n_images] + 1,axs=axs[3], show_axis=True)

for ax in axs.ravel():
    ax.grid(False)