In [None]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# update working directory
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/Licenta/DeCLIP-custom')

In [None]:
!pip install torchmetrics



In [None]:
import os
import time
import torch.multiprocessing
from copy import deepcopy
from tqdm import tqdm
import shutil
import numpy as np
from sklearn.metrics import average_precision_score, accuracy_score

from parameters import Parameters
from train.trainer import Trainer
from deepfake_datasets.datasets import get_dataloader
from train.early_stopping import EarlyStopping
from train.validate import validate_detection, validate_fully_supervised_localization
from utils.utils import compute_mean_iou, compute_mean_ap, compute_mean_f1, compute_mean_acc_detection, compute_mean_ap_detection, compute_batch_iou, compute_batch_localization_f1, compute_batch_ap, compute_accuracy_detection, compute_average_precision_detection


In [None]:
torch.multiprocessing.set_sharing_strategy('file_system')

In [None]:
new_dataset_name = 'train_all_4_datasets'

zip_path = f'/content/drive/MyDrive/Colab Notebooks/Licenta/DeCLIP-main/datasets_zip/combined_training_for_ood/{new_dataset_name}.zip'
new_content_root_path = f'/content/datasets/dolos_data/celebahq/fake/'

os.makedirs(new_content_root_path, exist_ok=True)

# move the dataset from drive to /content (SSD) for better performance in I/O
import zipfile
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(new_content_root_path)


zip_path = f'/content/drive/MyDrive/Colab Notebooks/Licenta/DeCLIP-main/datasets_zip/real.zip'
new_content_root_path = f'/content/datasets/dolos_data/celebahq/'

os.makedirs(new_content_root_path, exist_ok=True)

# move the dataset from drive to /content (SSD) for better performance in I/O
import zipfile
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(new_content_root_path)

In [None]:
# set parameters
params = Parameters()

# set experiment name
params.experiment_name = 'training_vit_classification'

# set backbone
params.arch = 'CLIP:ViT-L/14'

# create output dirs
params.create_output_dirs()

# uncomment lines below to set new datasets
# new_drive_root_path = '/content/drive/MyDrive/Colab Notebooks/Licenta/DeCLIP-main/datasets/dolos_data/celebahq/'
# new_root_path = '/content/dataset'
# new_root_path = '..\\DeCLIP-main\\datasets\\dolos_data\\celebahq\\'
# new_root_path = 'D:\\Python\\DeCLIP-main\\datasets\\dolos_data\\celebahq\\'


# move the dataset from drive to /content for better performance in I/O
# shutil.copytree(new_drive_root_path, new_root_path, dirs_exist_ok=True)

new_root_path = '/content/datasets/dolos_data/celebahq/'
new_dataset_name = 'train_all_4_datasets'

params.update_dolos_data_paths(new_root_path, new_dataset_name)

# manually update the real paths (not used until now)
params.train_real_path = '/content/datasets/dolos_data/celebahq/real/train'
params.valid_real_path = '/content/datasets/dolos_data/celebahq/real/valid'
params.test_real_path = '/content/datasets/dolos_data/celebahq/real/test'


# fix the backbone - to train only the decoder
params.fix_backbone = True

# set the feature layer and the decoder type
params.feature_layer = 'layer20'
params.decoder_type = 'linear'

# set the batch size and num threads
params.batch_size = 64
params.num_threads = 8

# set the number of epochs
params.num_iter = 30

# set loss type
params.loss_type = 'cross-entropy'

# detection task
params.task_type = 'classification'

for key, value in params.__dict__.items():
    print(f"{key}: {value}")

experiment_name: training_vit_classification
task_type: classification
data_label: train
arch: CLIP:ViT-L/14
fix_backbone: True
weight_decay: 0.0
batch_size: 64
num_threads: 8
init_type: normal
init_gain: 0.02
train_dataset: train_all_4_datasets
decoder_type: linear
feature_layer: layer20
early_stop_epochs: 5
optim: adam
beta1: 0.9
lr: 0.001
show_loss_freq: 50
num_iter: 30
data_root_path: /content/datasets/dolos_data/celebahq
train_fake_path: /content/datasets/dolos_data/celebahq/fake/train_all_4_datasets/images/train
valid_fake_path: /content/datasets/dolos_data/celebahq/fake/train_all_4_datasets/images/valid
test_fake_path: /content/datasets/dolos_data/celebahq/fake/train_all_4_datasets/images/test
train_masks_ground_truth_path: /content/datasets/dolos_data/celebahq/fake/train_all_4_datasets/masks/train
valid_masks_ground_truth_path: /content/datasets/dolos_data/celebahq/fake/train_all_4_datasets/masks/valid
test_masks_ground_truth_path: /content/datasets/dolos_data/celebahq/fake/tra

In [None]:
# get the model, along with its trainer
model_trainer = Trainer(params)

In [None]:
# get the dataloaders
train_loader = get_dataloader(params)

val_params = deepcopy(params)
val_params.data_label = 'valid'
val_loader = get_dataloader(val_params)

In [None]:
# start the training loop
torch.cuda.empty_cache()
# model_trainer.compute_pos_weight_per_dataset(train_loader)
early_stopping = EarlyStopping(patience=params.early_stop_epochs, verbose=True, delta=-0.001)
best_metric = 0.0 # iou for localization, ap for detection
print('Length of training set:', len(train_loader.dataset))
print('Length of validation set:', len(val_loader.dataset))
start_time = time.time()
for epoch in tqdm(range(params.num_iter)):
    print('Epoch:', epoch)

    epoch_loss = 0
    for data in train_loader:
        model_trainer.total_steps += 1

        model_trainer.set_input(data)
        model_trainer.optimize_parameters()

        if model_trainer.total_steps % params.show_loss_freq == 0:
            epoch_loss += model_trainer.loss.item()
            print(f'Train Loss: {model_trainer.loss.item():.4f} at step {model_trainer.total_steps} \t Iter time: {(time.time() - start_time) / model_trainer.total_steps:.2f}')


    epoch_loss /= len(train_loader.dataset)
    print(f'Average Train Loss: {epoch_loss:.4f}')

    # compute training metrics
    if params.task_type == 'fully_supervised_localization':
        compute_mean_iou(model_trainer.ious, verbose=True, extra_text=f'Train epoch {epoch} ')
        model_trainer.ious = []

        compute_mean_f1(model_trainer.f1_best, model_trainer.f1_fixed, verbose=True, extra_text=f'Train epoch {epoch} ')
        model_trainer.f1_best = []
        model_trainer.f1_fixed = []

        compute_mean_ap(model_trainer.ap, verbose=True, extra_text=f'Train epoch {epoch} ')
        model_trainer.ap = []

    elif params.task_type == 'detection':
        model_trainer.format_output_detection()

        # compute_mean_acc_detection(model_trainer.logits, model_trainer.labels, verbose=True, extra_text=f'Train epoch {epoch} ')
        # compute_mean_ap_detection(model_trainer.logits, model_trainer.labels, verbose=True, extra_text=f'Train epoch {epoch} ')

        logits_np = model_trainer.logits.detach().cpu().numpy()
        labels_np = model_trainer.labels.detach().cpu().numpy()

        num_classes = logits_np.shape[1]
        labels_np = labels_np.astype(int)
        labels_one_hot = np.eye(num_classes)[labels_np]

        ap = average_precision_score(labels_one_hot, logits_np, average='macro')
        acc = accuracy_score(labels_np, np.argmax(logits_np, axis=1))

        print(f'Train epoch {epoch} Mean ACC: {ap:.4f}')
        print(f'Train epoch {epoch} Mean AP: {acc:.4f}')

        model_trainer.logits = []
        model_trainer.labels = []


    # validate the model
    print('Validation:')
    if params.task_type == 'fully_supervised_localization':
        ious, f1_best, f1_fixed, mean_ap, _ = validate_fully_supervised_localization(model_trainer.model, val_loader, params.train_dataset)

        # compute metrics
        mean_iou = compute_mean_iou(ious, verbose=True, extra_text=f'Validation at epoch {epoch} ')

        mean_f1_best, mean_f1_fixed = compute_mean_f1(f1_best, f1_fixed, verbose=True, extra_text=f'Validation at epoch {epoch} ')

        mean_ap = compute_mean_ap(mean_ap, verbose=True, extra_text=f'Validation at epoch {epoch} ')

        # save the model if the mean iou is improved
        if mean_iou > best_metric:
            best_metric = mean_iou
            model_trainer.save_model(f'best_localization_model_iou_{mean_iou:.4f}.pth')
            print(f'Best model saved at epoch {epoch}!')

        # check for early stopping
        early_stopping(mean_iou)

    elif params.task_type == 'detection' or params.task_type == 'classification':
        ap, acc, _ = validate_detection(model_trainer.model, val_loader)

        print(f'Validation at epoch {epoch} - AP: {ap:.4f}, Acc: {acc:.4f}')

        # save the model if the mean ap is improved
        if ap > best_metric:
            best_metric = ap
            model_trainer.save_model(f'best_{params.task_type}_model_ap_{ap:.4f}.pth')
            print(f'Best model saved at epoch {epoch}!')

        # check for early stopping
        early_stopping(ap)

    # check if early stopping is triggered
    if early_stopping.early_stop:
        print("Early stopping triggered")
        continue_training = model_trainer.adjust_learning_rate()
        if continue_training:
            print("Continuing training with a learning rate reduced by a factor of 10")
            early_stopping = EarlyStopping(patience=params.early_stop_epochs, verbose=True, delta=-0.002) # adjust the delta only once, otherwise stop completely
        else:
            print(f"Early stopping training at epoch {epoch}")
            break
    print()
    torch.cuda.empty_cache()

Length of training set: 21000
Length of validation set: 2100


  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 0
Train Loss: 1.0043 at step 50 	 Iter time: 1.51
Train Loss: 1.0092 at step 100 	 Iter time: 1.53
Train Loss: 0.7276 at step 150 	 Iter time: 1.54
Train Loss: 0.7622 at step 200 	 Iter time: 1.55
Train Loss: 0.7871 at step 250 	 Iter time: 1.55
Train Loss: 0.5891 at step 300 	 Iter time: 1.55
Average Train Loss: 0.0002
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:02<01:25,  2.67s/it][A
  9%|▉         | 3/33 [00:04<00:38,  1.27s/it][A
 12%|█▏        | 4/33 [00:05<00:39,  1.36s/it][A
 15%|█▌        | 5/33 [00:07<00:39,  1.41s/it][A
 18%|█▊        | 6/33 [00:08<00:39,  1.45s/it][A
 21%|██        | 7/33 [00:10<00:38,  1.48s/it][A
 24%|██▍       | 8/33 [00:11<00:37,  1.49s/it][A
 27%|██▋       | 9/33 [00:13<00:36,  1.50s/it][A
 30%|███       | 10/33 [00:14<00:34,  1.51s/it][A
 33%|███▎      | 11/33 [00:16<00:33,  1.52s/it][A
 36%|███▋      | 12/33 [00:18<00:32,  1.53s/it][A
 39%|███▉      | 13/33 [00:19<00:30,  1.54s/it][A
 42%|████▏     | 14/33 [00:21<00:29,  1.55s/it][A
 45%|████▌     | 15/33 [00:22<00:27,  1.55s/it][A
 48%|████▊     | 16/33 [00:24<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:24,  1.56s/it][A
 55%|█████▍    | 18/33 [00:27<00:23,  1.56s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.57s/it][A
 61%|██████    | 20/33 [00:30<00:20,  1.57s/it]

Validation at epoch 0 - AP: 0.7880, Acc: 0.7905


  3%|▎         | 1/30 [09:26<4:34:01, 566.95s/it]

Best model saved at epoch 0!

Epoch: 1
Train Loss: 0.7520 at step 350 	 Iter time: 1.71
Train Loss: 0.5396 at step 400 	 Iter time: 1.70
Train Loss: 0.4902 at step 450 	 Iter time: 1.68
Train Loss: 0.5648 at step 500 	 Iter time: 1.67
Train Loss: 0.6150 at step 550 	 Iter time: 1.66
Train Loss: 0.4984 at step 600 	 Iter time: 1.65
Train Loss: 0.4652 at step 650 	 Iter time: 1.64
Average Train Loss: 0.0002
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:18,  1.78it/s][A
  6%|▌         | 2/33 [00:02<00:34,  1.12s/it][A
  9%|▉         | 3/33 [00:03<00:39,  1.32s/it][A
 12%|█▏        | 4/33 [00:05<00:40,  1.41s/it][A
 15%|█▌        | 5/33 [00:06<00:41,  1.47s/it][A
 18%|█▊        | 6/33 [00:08<00:40,  1.50s/it][A
 21%|██        | 7/33 [00:09<00:39,  1.52s/it][A
 24%|██▍       | 8/33 [00:11<00:38,  1.53s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.54s/it][A
 30%|███       | 10/33 [00:14<00:35,  1.54s/it][A
 33%|███▎      | 11/33 [00:16<00:34,  1.55s/it][A
 36%|███▋      | 12/33 [00:17<00:32,  1.55s/it][A
 39%|███▉      | 13/33 [00:19<00:31,  1.55s/it][A
 42%|████▏     | 14/33 [00:20<00:29,  1.56s/it][A
 45%|████▌     | 15/33 [00:22<00:28,  1.56s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:24,  1.56s/it][A
 55%|█████▍    | 18/33 [00:27<00:23,  1.56s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.56s/it]

Validation at epoch 1 - AP: 0.8071, Acc: 0.8262


  7%|▋         | 2/30 [18:53<4:24:27, 566.71s/it]

Best model saved at epoch 1!

Epoch: 2
Train Loss: 0.6208 at step 700 	 Iter time: 1.71
Train Loss: 0.5702 at step 750 	 Iter time: 1.70
Train Loss: 0.5111 at step 800 	 Iter time: 1.69
Train Loss: 0.5788 at step 850 	 Iter time: 1.69
Train Loss: 0.4894 at step 900 	 Iter time: 1.68
Train Loss: 0.4926 at step 950 	 Iter time: 1.67
Average Train Loss: 0.0002
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:17,  1.79it/s][A
  6%|▌         | 2/33 [00:02<00:34,  1.10s/it][A
  9%|▉         | 3/33 [00:03<00:38,  1.30s/it][A
 12%|█▏        | 4/33 [00:05<00:40,  1.39s/it][A
 15%|█▌        | 5/33 [00:06<00:40,  1.44s/it][A
 18%|█▊        | 6/33 [00:08<00:39,  1.48s/it][A
 21%|██        | 7/33 [00:09<00:39,  1.50s/it][A
 24%|██▍       | 8/33 [00:11<00:38,  1.52s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.53s/it][A
 30%|███       | 10/33 [00:14<00:35,  1.54s/it][A
 33%|███▎      | 11/33 [00:15<00:34,  1.55s/it][A
 36%|███▋      | 12/33 [00:17<00:32,  1.55s/it][A
 39%|███▉      | 13/33 [00:19<00:31,  1.55s/it][A
 42%|████▏     | 14/33 [00:20<00:29,  1.56s/it][A
 45%|████▌     | 15/33 [00:22<00:28,  1.56s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:24,  1.56s/it][A
 55%|█████▍    | 18/33 [00:26<00:23,  1.56s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.56s/it]

Validation at epoch 2 - AP: 0.8183, Acc: 0.8329


 10%|█         | 3/30 [28:23<4:15:44, 568.33s/it]

Best model saved at epoch 2!

Epoch: 3
Train Loss: 0.4503 at step 1000 	 Iter time: 1.72
Train Loss: 0.3483 at step 1050 	 Iter time: 1.72
Train Loss: 0.4367 at step 1100 	 Iter time: 1.71
Train Loss: 0.4218 at step 1150 	 Iter time: 1.70
Train Loss: 0.3778 at step 1200 	 Iter time: 1.70
Train Loss: 0.4024 at step 1250 	 Iter time: 1.69
Train Loss: 0.5044 at step 1300 	 Iter time: 1.69
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:16,  1.93it/s][A
  6%|▌         | 2/33 [00:02<00:33,  1.09s/it][A
  9%|▉         | 3/33 [00:03<00:38,  1.30s/it][A
 12%|█▏        | 4/33 [00:05<00:40,  1.40s/it][A
 15%|█▌        | 5/33 [00:06<00:40,  1.46s/it][A
 18%|█▊        | 6/33 [00:08<00:40,  1.49s/it][A
 21%|██        | 7/33 [00:09<00:39,  1.51s/it][A
 24%|██▍       | 8/33 [00:11<00:38,  1.53s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.53s/it][A
 30%|███       | 10/33 [00:14<00:35,  1.54s/it][A
 33%|███▎      | 11/33 [00:15<00:33,  1.54s/it][A
 36%|███▋      | 12/33 [00:17<00:32,  1.55s/it][A
 39%|███▉      | 13/33 [00:19<00:31,  1.55s/it][A
 42%|████▏     | 14/33 [00:20<00:29,  1.55s/it][A
 45%|████▌     | 15/33 [00:22<00:27,  1.55s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:24,  1.56s/it][A
 55%|█████▍    | 18/33 [00:26<00:23,  1.56s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.56s/it]

Validation at epoch 3 - AP: 0.8222, Acc: 0.8467


 13%|█▎        | 4/30 [37:50<4:06:00, 567.71s/it]

Best model saved at epoch 3!

Epoch: 4
Train Loss: 0.3244 at step 1350 	 Iter time: 1.72
Train Loss: 0.4213 at step 1400 	 Iter time: 1.72
Train Loss: 0.4329 at step 1450 	 Iter time: 1.71
Train Loss: 0.3330 at step 1500 	 Iter time: 1.71
Train Loss: 0.3401 at step 1550 	 Iter time: 1.70
Train Loss: 0.3590 at step 1600 	 Iter time: 1.70
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:11,  2.79it/s][A
  6%|▌         | 2/33 [00:01<00:32,  1.04s/it][A
  9%|▉         | 3/33 [00:03<00:38,  1.27s/it][A
 12%|█▏        | 4/33 [00:04<00:40,  1.38s/it][A
 15%|█▌        | 5/33 [00:06<00:40,  1.45s/it][A
 18%|█▊        | 6/33 [00:08<00:40,  1.48s/it][A
 21%|██        | 7/33 [00:09<00:39,  1.51s/it][A
 24%|██▍       | 8/33 [00:11<00:38,  1.52s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.53s/it][A
 30%|███       | 10/33 [00:14<00:35,  1.54s/it][A
 33%|███▎      | 11/33 [00:15<00:33,  1.55s/it][A
 36%|███▋      | 12/33 [00:17<00:32,  1.55s/it][A
 39%|███▉      | 13/33 [00:18<00:31,  1.55s/it][A
 42%|████▏     | 14/33 [00:20<00:29,  1.55s/it][A
 45%|████▌     | 15/33 [00:22<00:27,  1.55s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:24,  1.56s/it][A
 55%|█████▍    | 18/33 [00:26<00:23,  1.56s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.56s/it]

Validation at epoch 4 - AP: 0.8235, Acc: 0.8471


 17%|█▋        | 5/30 [47:17<3:56:22, 567.29s/it]

Best model saved at epoch 4!

Epoch: 5
Train Loss: 0.3676 at step 1650 	 Iter time: 1.72
Train Loss: 0.4874 at step 1700 	 Iter time: 1.72
Train Loss: 0.4023 at step 1750 	 Iter time: 1.71
Train Loss: 0.3178 at step 1800 	 Iter time: 1.71
Train Loss: 0.4435 at step 1850 	 Iter time: 1.71
Train Loss: 0.4995 at step 1900 	 Iter time: 1.70
Train Loss: 0.4484 at step 1950 	 Iter time: 1.70
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:12,  2.65it/s][A
  6%|▌         | 2/33 [00:01<00:32,  1.05s/it][A
  9%|▉         | 3/33 [00:03<00:38,  1.28s/it][A
 12%|█▏        | 4/33 [00:04<00:40,  1.39s/it][A
 15%|█▌        | 5/33 [00:06<00:40,  1.45s/it][A
 18%|█▊        | 6/33 [00:08<00:40,  1.48s/it][A
 21%|██        | 7/33 [00:09<00:39,  1.51s/it][A
 24%|██▍       | 8/33 [00:11<00:38,  1.52s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.54s/it][A
 30%|███       | 10/33 [00:14<00:35,  1.54s/it][A
 33%|███▎      | 11/33 [00:15<00:34,  1.55s/it][A
 36%|███▋      | 12/33 [00:17<00:32,  1.55s/it][A
 39%|███▉      | 13/33 [00:19<00:31,  1.55s/it][A
 42%|████▏     | 14/33 [00:20<00:29,  1.56s/it][A
 45%|████▌     | 15/33 [00:22<00:28,  1.56s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:24,  1.56s/it][A
 55%|█████▍    | 18/33 [00:26<00:23,  1.56s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.56s/it]

Validation at epoch 5 - AP: 0.8258, Acc: 0.8519


 20%|██        | 6/30 [56:42<3:46:43, 566.82s/it]

Best model saved at epoch 5!

Epoch: 6
Train Loss: 0.3038 at step 2000 	 Iter time: 1.72
Train Loss: 0.3842 at step 2050 	 Iter time: 1.72
Train Loss: 0.4018 at step 2100 	 Iter time: 1.71
Train Loss: 0.4564 at step 2150 	 Iter time: 1.71
Train Loss: 0.3539 at step 2200 	 Iter time: 1.71
Train Loss: 0.4500 at step 2250 	 Iter time: 1.70
Train Loss: 0.4037 at step 2300 	 Iter time: 1.70
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:13,  2.40it/s][A
  6%|▌         | 2/33 [00:01<00:32,  1.05s/it][A
  9%|▉         | 3/33 [00:03<00:37,  1.26s/it][A
 12%|█▏        | 4/33 [00:04<00:39,  1.37s/it][A
 15%|█▌        | 5/33 [00:06<00:40,  1.43s/it][A
 18%|█▊        | 6/33 [00:08<00:39,  1.48s/it][A
 21%|██        | 7/33 [00:09<00:39,  1.50s/it][A
 24%|██▍       | 8/33 [00:11<00:38,  1.52s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.53s/it][A
 30%|███       | 10/33 [00:14<00:35,  1.54s/it][A
 33%|███▎      | 11/33 [00:15<00:34,  1.55s/it][A
 36%|███▋      | 12/33 [00:17<00:32,  1.55s/it][A
 39%|███▉      | 13/33 [00:18<00:31,  1.55s/it][A
 42%|████▏     | 14/33 [00:20<00:29,  1.55s/it][A
 45%|████▌     | 15/33 [00:22<00:28,  1.56s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:24,  1.56s/it][A
 55%|█████▍    | 18/33 [00:26<00:23,  1.56s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.56s/it]

Validation at epoch 6 - AP: 0.8267, Acc: 0.8486


 23%|██▎       | 7/30 [1:06:11<3:37:26, 567.23s/it]

Best model saved at epoch 6!
Early Stopping counter: 1 out of 5

Epoch: 7
Train Loss: 0.3746 at step 2350 	 Iter time: 1.72
Train Loss: 0.3648 at step 2400 	 Iter time: 1.72
Train Loss: 0.3302 at step 2450 	 Iter time: 1.71
Train Loss: 0.3280 at step 2500 	 Iter time: 1.71
Train Loss: 0.3135 at step 2550 	 Iter time: 1.71
Train Loss: 0.5693 at step 2600 	 Iter time: 1.71
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:12,  2.59it/s][A
  6%|▌         | 2/33 [00:01<00:32,  1.04s/it][A
  9%|▉         | 3/33 [00:03<00:37,  1.26s/it][A
 12%|█▏        | 4/33 [00:04<00:39,  1.36s/it][A
 15%|█▌        | 5/33 [00:06<00:40,  1.44s/it][A
 18%|█▊        | 6/33 [00:08<00:39,  1.48s/it][A
 21%|██        | 7/33 [00:09<00:39,  1.50s/it][A
 24%|██▍       | 8/33 [00:11<00:38,  1.52s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.53s/it][A
 30%|███       | 10/33 [00:14<00:35,  1.54s/it][A
 33%|███▎      | 11/33 [00:15<00:34,  1.55s/it][A
 36%|███▋      | 12/33 [00:17<00:32,  1.55s/it][A
 39%|███▉      | 13/33 [00:18<00:31,  1.55s/it][A
 42%|████▏     | 14/33 [00:20<00:29,  1.56s/it][A
 45%|████▌     | 15/33 [00:22<00:28,  1.56s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:24,  1.56s/it][A
 55%|█████▍    | 18/33 [00:26<00:23,  1.56s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.56s/it]

Validation at epoch 7 - AP: 0.8253, Acc: 0.8548
Early Stopping counter: 2 out of 5

Epoch: 8
Train Loss: 0.3661 at step 2650 	 Iter time: 1.72
Train Loss: 0.3234 at step 2700 	 Iter time: 1.72
Train Loss: 0.3154 at step 2750 	 Iter time: 1.72
Train Loss: 0.3468 at step 2800 	 Iter time: 1.71
Train Loss: 0.3685 at step 2850 	 Iter time: 1.71
Train Loss: 0.2406 at step 2900 	 Iter time: 1.71
Train Loss: 0.4241 at step 2950 	 Iter time: 1.71
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:16,  1.89it/s][A
  6%|▌         | 2/33 [00:02<00:34,  1.10s/it][A
  9%|▉         | 3/33 [00:03<00:38,  1.29s/it][A
 12%|█▏        | 4/33 [00:05<00:40,  1.39s/it][A
 15%|█▌        | 5/33 [00:06<00:40,  1.44s/it][A
 18%|█▊        | 6/33 [00:08<00:39,  1.47s/it][A
 21%|██        | 7/33 [00:09<00:38,  1.49s/it][A
 24%|██▍       | 8/33 [00:11<00:37,  1.50s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.51s/it][A
 30%|███       | 10/33 [00:14<00:35,  1.53s/it][A
 33%|███▎      | 11/33 [00:15<00:33,  1.54s/it][A
 36%|███▋      | 12/33 [00:17<00:32,  1.54s/it][A
 39%|███▉      | 13/33 [00:18<00:30,  1.55s/it][A
 42%|████▏     | 14/33 [00:20<00:29,  1.55s/it][A
 45%|████▌     | 15/33 [00:22<00:27,  1.55s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:24,  1.56s/it][A
 55%|█████▍    | 18/33 [00:26<00:23,  1.56s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.56s/it]

Validation at epoch 8 - AP: 0.8310, Acc: 0.8533


 30%|███       | 9/30 [1:25:01<3:18:12, 566.32s/it]

Best model saved at epoch 8!

Epoch: 9
Train Loss: 0.4324 at step 3000 	 Iter time: 1.72
Train Loss: 0.4970 at step 3050 	 Iter time: 1.72
Train Loss: 0.3096 at step 3100 	 Iter time: 1.72
Train Loss: 0.4041 at step 3150 	 Iter time: 1.71
Train Loss: 0.3784 at step 3200 	 Iter time: 1.71
Train Loss: 0.3443 at step 3250 	 Iter time: 1.71
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:11,  2.68it/s][A
  6%|▌         | 2/33 [00:01<00:32,  1.04s/it][A
  9%|▉         | 3/33 [00:03<00:38,  1.28s/it][A
 12%|█▏        | 4/33 [00:04<00:40,  1.39s/it][A
 15%|█▌        | 5/33 [00:06<00:40,  1.45s/it][A
 18%|█▊        | 6/33 [00:08<00:40,  1.49s/it][A
 21%|██        | 7/33 [00:09<00:39,  1.51s/it][A
 24%|██▍       | 8/33 [00:11<00:38,  1.52s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.54s/it][A
 30%|███       | 10/33 [00:14<00:35,  1.54s/it][A
 33%|███▎      | 11/33 [00:15<00:34,  1.55s/it][A
 36%|███▋      | 12/33 [00:17<00:32,  1.55s/it][A
 39%|███▉      | 13/33 [00:19<00:31,  1.55s/it][A
 42%|████▏     | 14/33 [00:20<00:29,  1.55s/it][A
 45%|████▌     | 15/33 [00:22<00:27,  1.56s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:24,  1.56s/it][A
 55%|█████▍    | 18/33 [00:26<00:23,  1.56s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.56s/it]

Validation at epoch 9 - AP: 0.8277, Acc: 0.8533
Early Stopping counter: 1 out of 5

Epoch: 10
Train Loss: 0.4174 at step 3300 	 Iter time: 1.72
Train Loss: 0.4185 at step 3350 	 Iter time: 1.72
Train Loss: 0.2847 at step 3400 	 Iter time: 1.72
Train Loss: 0.4342 at step 3450 	 Iter time: 1.71
Train Loss: 0.2628 at step 3500 	 Iter time: 1.71
Train Loss: 0.3486 at step 3550 	 Iter time: 1.71
Train Loss: 0.4646 at step 3600 	 Iter time: 1.71
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:17,  1.87it/s][A
  6%|▌         | 2/33 [00:02<00:34,  1.11s/it][A
  9%|▉         | 3/33 [00:03<00:39,  1.31s/it][A
 12%|█▏        | 4/33 [00:05<00:40,  1.41s/it][A
 15%|█▌        | 5/33 [00:06<00:41,  1.47s/it][A
 18%|█▊        | 6/33 [00:08<00:40,  1.50s/it][A
 21%|██        | 7/33 [00:09<00:39,  1.52s/it][A
 24%|██▍       | 8/33 [00:11<00:38,  1.53s/it][A
 27%|██▋       | 9/33 [00:12<00:37,  1.54s/it][A
 30%|███       | 10/33 [00:14<00:35,  1.55s/it][A
 33%|███▎      | 11/33 [00:16<00:34,  1.56s/it][A
 36%|███▋      | 12/33 [00:17<00:32,  1.56s/it][A
 39%|███▉      | 13/33 [00:19<00:31,  1.56s/it][A
 42%|████▏     | 14/33 [00:20<00:29,  1.56s/it][A
 45%|████▌     | 15/33 [00:22<00:28,  1.56s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:25,  1.57s/it][A
 55%|█████▍    | 18/33 [00:27<00:23,  1.57s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.57s/it]

Validation at epoch 10 - AP: 0.8267, Acc: 0.8552
Early Stopping counter: 2 out of 5

Epoch: 11
Train Loss: 0.2834 at step 3650 	 Iter time: 1.72
Train Loss: 0.5496 at step 3700 	 Iter time: 1.72
Train Loss: 0.4420 at step 3750 	 Iter time: 1.72
Train Loss: 0.2415 at step 3800 	 Iter time: 1.71
Train Loss: 0.4117 at step 3850 	 Iter time: 1.71
Train Loss: 0.3132 at step 3900 	 Iter time: 1.71
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:13,  2.42it/s][A
  6%|▌         | 2/33 [00:01<00:32,  1.05s/it][A
  9%|▉         | 3/33 [00:03<00:38,  1.27s/it][A
 12%|█▏        | 4/33 [00:04<00:39,  1.37s/it][A
 15%|█▌        | 5/33 [00:06<00:39,  1.43s/it][A
 18%|█▊        | 6/33 [00:08<00:39,  1.47s/it][A
 21%|██        | 7/33 [00:09<00:39,  1.50s/it][A
 24%|██▍       | 8/33 [00:11<00:38,  1.52s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.53s/it][A
 30%|███       | 10/33 [00:14<00:35,  1.54s/it][A
 33%|███▎      | 11/33 [00:15<00:34,  1.55s/it][A
 36%|███▋      | 12/33 [00:17<00:32,  1.55s/it][A
 39%|███▉      | 13/33 [00:18<00:31,  1.55s/it][A
 42%|████▏     | 14/33 [00:20<00:29,  1.55s/it][A
 45%|████▌     | 15/33 [00:22<00:28,  1.56s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:24,  1.56s/it][A
 55%|█████▍    | 18/33 [00:26<00:23,  1.56s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.56s/it]

Validation at epoch 11 - AP: 0.8303, Acc: 0.8514
Early Stopping counter: 3 out of 5

Epoch: 12
Train Loss: 0.2947 at step 3950 	 Iter time: 1.72
Train Loss: 0.4901 at step 4000 	 Iter time: 1.72
Train Loss: 0.5468 at step 4050 	 Iter time: 1.72
Train Loss: 0.4226 at step 4100 	 Iter time: 1.71
Train Loss: 0.2086 at step 4150 	 Iter time: 1.71
Train Loss: 0.3389 at step 4200 	 Iter time: 1.71
Train Loss: 0.2874 at step 4250 	 Iter time: 1.71
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:16,  1.99it/s][A
  6%|▌         | 2/33 [00:01<00:33,  1.07s/it][A
  9%|▉         | 3/33 [00:03<00:38,  1.28s/it][A
 12%|█▏        | 4/33 [00:05<00:39,  1.38s/it][A
 15%|█▌        | 5/33 [00:06<00:40,  1.43s/it][A
 18%|█▊        | 6/33 [00:08<00:39,  1.46s/it][A
 21%|██        | 7/33 [00:09<00:38,  1.48s/it][A
 24%|██▍       | 8/33 [00:11<00:37,  1.50s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.51s/it][A
 30%|███       | 10/33 [00:14<00:34,  1.51s/it][A
 33%|███▎      | 11/33 [00:15<00:33,  1.52s/it][A
 36%|███▋      | 12/33 [00:17<00:31,  1.52s/it][A
 39%|███▉      | 13/33 [00:18<00:30,  1.52s/it][A
 42%|████▏     | 14/33 [00:20<00:28,  1.52s/it][A
 45%|████▌     | 15/33 [00:21<00:27,  1.52s/it][A
 48%|████▊     | 16/33 [00:23<00:25,  1.53s/it][A
 52%|█████▏    | 17/33 [00:24<00:24,  1.53s/it][A
 55%|█████▍    | 18/33 [00:26<00:22,  1.53s/it][A
 58%|█████▊    | 19/33 [00:27<00:21,  1.54s/it]

Validation at epoch 12 - AP: 0.8284, Acc: 0.8552
Early Stopping counter: 4 out of 5

Epoch: 13
Train Loss: 0.3590 at step 4300 	 Iter time: 1.72
Train Loss: 0.2990 at step 4350 	 Iter time: 1.72
Train Loss: 0.5172 at step 4400 	 Iter time: 1.72
Train Loss: 0.3480 at step 4450 	 Iter time: 1.71
Train Loss: 0.3467 at step 4500 	 Iter time: 1.71
Train Loss: 0.2746 at step 4550 	 Iter time: 1.71
Train Loss: 0.5319 at step 4600 	 Iter time: 1.71
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:16,  1.98it/s][A
  6%|▌         | 2/33 [00:02<00:34,  1.10s/it][A
  9%|▉         | 3/33 [00:03<00:39,  1.30s/it][A
 12%|█▏        | 4/33 [00:05<00:40,  1.41s/it][A
 15%|█▌        | 5/33 [00:06<00:40,  1.46s/it][A
 18%|█▊        | 6/33 [00:08<00:40,  1.49s/it][A
 21%|██        | 7/33 [00:09<00:39,  1.51s/it][A
 24%|██▍       | 8/33 [00:11<00:38,  1.53s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.54s/it][A
 30%|███       | 10/33 [00:14<00:35,  1.54s/it][A
 33%|███▎      | 11/33 [00:16<00:34,  1.55s/it][A
 36%|███▋      | 12/33 [00:17<00:32,  1.55s/it][A
 39%|███▉      | 13/33 [00:19<00:31,  1.56s/it][A
 42%|████▏     | 14/33 [00:20<00:29,  1.56s/it][A
 45%|████▌     | 15/33 [00:22<00:28,  1.56s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:24,  1.56s/it][A
 55%|█████▍    | 18/33 [00:26<00:23,  1.56s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.56s/it]

Validation at epoch 13 - AP: 0.8295, Acc: 0.8543
Early Stopping counter: 5 out of 5
Early stopping triggered
Continuing training with a learning rate reduced by a factor of 10

Epoch: 14
Train Loss: 0.2817 at step 4650 	 Iter time: 1.72
Train Loss: 0.3100 at step 4700 	 Iter time: 1.72
Train Loss: 0.3852 at step 4750 	 Iter time: 1.71
Train Loss: 0.3974 at step 4800 	 Iter time: 1.71
Train Loss: 0.4004 at step 4850 	 Iter time: 1.71
Train Loss: 0.5061 at step 4900 	 Iter time: 1.71
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:16,  1.98it/s][A
  6%|▌         | 2/33 [00:01<00:33,  1.09s/it][A
  9%|▉         | 3/33 [00:03<00:38,  1.28s/it][A
 12%|█▏        | 4/33 [00:05<00:39,  1.38s/it][A
 15%|█▌        | 5/33 [00:06<00:40,  1.43s/it][A
 18%|█▊        | 6/33 [00:08<00:39,  1.46s/it][A
 21%|██        | 7/33 [00:09<00:38,  1.48s/it][A
 24%|██▍       | 8/33 [00:11<00:37,  1.51s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.52s/it][A
 30%|███       | 10/33 [00:14<00:35,  1.53s/it][A
 33%|███▎      | 11/33 [00:15<00:33,  1.54s/it][A
 36%|███▋      | 12/33 [00:17<00:32,  1.55s/it][A
 39%|███▉      | 13/33 [00:18<00:31,  1.55s/it][A
 42%|████▏     | 14/33 [00:20<00:29,  1.55s/it][A
 45%|████▌     | 15/33 [00:22<00:27,  1.56s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:24,  1.56s/it][A
 55%|█████▍    | 18/33 [00:26<00:23,  1.56s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.56s/it]

Validation at epoch 14 - AP: 0.8293, Acc: 0.8586

Epoch: 15
Train Loss: 0.3823 at step 4950 	 Iter time: 1.72
Train Loss: 0.2806 at step 5000 	 Iter time: 1.72
Train Loss: 0.3716 at step 5050 	 Iter time: 1.71
Train Loss: 0.3928 at step 5100 	 Iter time: 1.71
Train Loss: 0.3230 at step 5150 	 Iter time: 1.71
Train Loss: 0.2760 at step 5200 	 Iter time: 1.71
Train Loss: 0.3165 at step 5250 	 Iter time: 1.71
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:17,  1.85it/s][A
  6%|▌         | 2/33 [00:02<00:34,  1.11s/it][A
  9%|▉         | 3/33 [00:03<00:39,  1.31s/it][A
 12%|█▏        | 4/33 [00:05<00:40,  1.41s/it][A
 15%|█▌        | 5/33 [00:06<00:41,  1.47s/it][A
 18%|█▊        | 6/33 [00:08<00:40,  1.50s/it][A
 21%|██        | 7/33 [00:09<00:39,  1.52s/it][A
 24%|██▍       | 8/33 [00:11<00:38,  1.53s/it][A
 27%|██▋       | 9/33 [00:12<00:37,  1.54s/it][A
 30%|███       | 10/33 [00:14<00:35,  1.55s/it][A
 33%|███▎      | 11/33 [00:16<00:34,  1.55s/it][A
 36%|███▋      | 12/33 [00:17<00:32,  1.56s/it][A
 39%|███▉      | 13/33 [00:19<00:31,  1.56s/it][A
 42%|████▏     | 14/33 [00:20<00:29,  1.56s/it][A
 45%|████▌     | 15/33 [00:22<00:28,  1.56s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:24,  1.56s/it][A
 55%|█████▍    | 18/33 [00:27<00:23,  1.56s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.56s/it]

Validation at epoch 15 - AP: 0.8289, Acc: 0.8562
Early Stopping counter: 1 out of 5

Epoch: 16
Train Loss: 0.3071 at step 5300 	 Iter time: 1.72
Train Loss: 0.2683 at step 5350 	 Iter time: 1.72
Train Loss: 0.3011 at step 5400 	 Iter time: 1.71
Train Loss: 0.3210 at step 5450 	 Iter time: 1.71
Train Loss: 0.3459 at step 5500 	 Iter time: 1.71
Train Loss: 0.2522 at step 5550 	 Iter time: 1.71
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:16,  1.90it/s][A
  6%|▌         | 2/33 [00:02<00:33,  1.09s/it][A
  9%|▉         | 3/33 [00:03<00:38,  1.29s/it][A
 12%|█▏        | 4/33 [00:05<00:40,  1.38s/it][A
 15%|█▌        | 5/33 [00:06<00:40,  1.43s/it][A
 18%|█▊        | 6/33 [00:08<00:39,  1.46s/it][A
 21%|██        | 7/33 [00:09<00:38,  1.48s/it][A
 24%|██▍       | 8/33 [00:11<00:37,  1.50s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.51s/it][A
 30%|███       | 10/33 [00:14<00:34,  1.51s/it][A
 33%|███▎      | 11/33 [00:15<00:33,  1.51s/it][A
 36%|███▋      | 12/33 [00:17<00:31,  1.52s/it][A
 39%|███▉      | 13/33 [00:18<00:30,  1.52s/it][A
 42%|████▏     | 14/33 [00:20<00:28,  1.52s/it][A
 45%|████▌     | 15/33 [00:21<00:27,  1.52s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.53s/it][A
 52%|█████▏    | 17/33 [00:24<00:24,  1.54s/it][A
 55%|█████▍    | 18/33 [00:26<00:23,  1.54s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.55s/it]

Validation at epoch 16 - AP: 0.8286, Acc: 0.8586
Early Stopping counter: 2 out of 5

Epoch: 17
Train Loss: 0.3848 at step 5600 	 Iter time: 1.72
Train Loss: 0.4483 at step 5650 	 Iter time: 1.72
Train Loss: 0.3413 at step 5700 	 Iter time: 1.71
Train Loss: 0.4626 at step 5750 	 Iter time: 1.71
Train Loss: 0.3233 at step 5800 	 Iter time: 1.71
Train Loss: 0.3521 at step 5850 	 Iter time: 1.71
Train Loss: 0.3867 at step 5900 	 Iter time: 1.71
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:12,  2.63it/s][A
  6%|▌         | 2/33 [00:01<00:32,  1.05s/it][A
  9%|▉         | 3/33 [00:03<00:38,  1.28s/it][A
 12%|█▏        | 4/33 [00:05<00:40,  1.39s/it][A
 15%|█▌        | 5/33 [00:06<00:40,  1.45s/it][A
 18%|█▊        | 6/33 [00:08<00:40,  1.49s/it][A
 21%|██        | 7/33 [00:09<00:39,  1.51s/it][A
 24%|██▍       | 8/33 [00:11<00:38,  1.53s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.54s/it][A
 30%|███       | 10/33 [00:14<00:35,  1.55s/it][A
 33%|███▎      | 11/33 [00:15<00:34,  1.55s/it][A
 36%|███▋      | 12/33 [00:17<00:32,  1.56s/it][A
 39%|███▉      | 13/33 [00:19<00:31,  1.56s/it][A
 42%|████▏     | 14/33 [00:20<00:29,  1.56s/it][A
 45%|████▌     | 15/33 [00:22<00:28,  1.56s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.56s/it][A
 52%|█████▏    | 17/33 [00:25<00:24,  1.56s/it][A
 55%|█████▍    | 18/33 [00:26<00:23,  1.56s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.56s/it]

Validation at epoch 17 - AP: 0.8290, Acc: 0.8586
Early Stopping counter: 3 out of 5

Epoch: 18
Train Loss: 0.5058 at step 5950 	 Iter time: 1.72
Train Loss: 0.3883 at step 6000 	 Iter time: 1.72
Train Loss: 0.4874 at step 6050 	 Iter time: 1.71
Train Loss: 0.3869 at step 6100 	 Iter time: 1.71
Train Loss: 0.2722 at step 6150 	 Iter time: 1.71
Train Loss: 0.3552 at step 6200 	 Iter time: 1.71
Train Loss: 0.1837 at step 6250 	 Iter time: 1.71
Average Train Loss: 0.0001
Validation:
Length of dataset:  2100



  0%|          | 0/33 [00:00<?, ?it/s][A
  3%|▎         | 1/33 [00:00<00:16,  1.91it/s][A
  6%|▌         | 2/33 [00:02<00:33,  1.09s/it][A
  9%|▉         | 3/33 [00:03<00:38,  1.29s/it][A
 12%|█▏        | 4/33 [00:05<00:40,  1.38s/it][A
 15%|█▌        | 5/33 [00:06<00:40,  1.43s/it][A
 18%|█▊        | 6/33 [00:08<00:39,  1.46s/it][A
 21%|██        | 7/33 [00:09<00:38,  1.48s/it][A
 24%|██▍       | 8/33 [00:11<00:37,  1.49s/it][A
 27%|██▋       | 9/33 [00:12<00:36,  1.50s/it][A
 30%|███       | 10/33 [00:14<00:34,  1.51s/it][A
 33%|███▎      | 11/33 [00:15<00:33,  1.51s/it][A
 36%|███▋      | 12/33 [00:17<00:31,  1.52s/it][A
 39%|███▉      | 13/33 [00:18<00:30,  1.52s/it][A
 42%|████▏     | 14/33 [00:20<00:28,  1.52s/it][A
 45%|████▌     | 15/33 [00:21<00:27,  1.52s/it][A
 48%|████▊     | 16/33 [00:23<00:26,  1.53s/it][A
 52%|█████▏    | 17/33 [00:24<00:24,  1.54s/it][A
 55%|█████▍    | 18/33 [00:26<00:23,  1.55s/it][A
 58%|█████▊    | 19/33 [00:28<00:21,  1.55s/it]

Validation at epoch 18 - AP: 0.8290, Acc: 0.8576
Early Stopping counter: 4 out of 5

Epoch: 19


 63%|██████▎   | 19/30 [2:59:39<1:44:00, 567.36s/it]


KeyboardInterrupt: 

In [None]:
from google.colab import files

model_dir = '/content/experiments/training_vit_classification/models/'
model_files = sorted(
    [f for f in os.listdir(model_dir) if f.endswith('.pth')]
)

best_model = model_files[-1]
best_model_path = os.path.join(model_dir, best_model)

files.download(best_model_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
shutil.copy(best_model_path, f'/content/drive/MyDrive/Colab Notebooks/Licenta/DeCLIP-custom/trained_models/detection/classification_{os.path.basename(best_model_path)}')

'/content/drive/MyDrive/Colab Notebooks/Licenta/DeCLIP-custom/trained_models/detection/classification_best_classification_model_ap_0.8310.pth'