In [None]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# update working directory
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/Licenta/DeCLIP-custom')

In [None]:
!pip install torchmetrics



In [None]:
import torch
import numpy as np
import random
import os
import shutil
import copy

from parameters import Parameters
from deepfake_datasets import LOCALIZATION_DATASET_PATHS, DETECTION_DATASET_PATHS
from deepfake_datasets.datasets import get_dataloader
from models.voting_ensemble import VotingEnsembleModel
from train.validate import validate_detection, validate_fully_supervised_localization, validate_ensemble_fully_supervised_localization
from utils.utils import compute_mean_iou, compute_mean_f1, compute_mean_ap, compute_mean_acc_detection, compute_mean_ap_detection

In [None]:
# set seed for reproducibility
SEED = 0
def set_seed():
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)

# save image predictions to files
def save_image_localization_scores_to_file(ious, f1_best, f1_fixed, aps, img_paths, params):
    with open(params.save_dir_results + "/scores.txt", 'w') as f:
        f.write(f'image path \t iou \t f1_best \t f1_fixed \t ap\n')
        for iou, f1_b, f1_f, ap, img_path in zip(ious, f1_best, f1_fixed, aps, img_paths):
            f.write(f'{img_path} \t {iou} \t {f1_b} \t {f1_f} \t {ap}\n')

# constants for image processing
MEAN = {
    "imagenet":[0.485, 0.456, 0.406],
    "clip":[0.48145466, 0.4578275, 0.40821073]
}
STD = {
    "imagenet":[0.229, 0.224, 0.225],
    "clip":[0.26862954, 0.26130258, 0.27577711]
}

In [None]:
# set parameters
params = Parameters()

# set the experiment name and output directory
# alert if the experiment name is not set or the output directory already exists
params.experiment_name = 'test_ensemble'
assert params.experiment_name != '', 'Please set the experiment name'
params.create_output_dirs()

# set data labels to test
params.data_label = 'test'

# uncomment lines below to set new datasets
new_root_path = '/content/datasets/dolos_data/celebahq/'
new_dataset_name = 'ldm'
params.update_dolos_data_paths(new_root_path, new_dataset_name)

# set the model checkpoint path
params.checkpoint_path = '/content/drive/MyDrive/Colab Notebooks/Licenta/DeCLIP-custom/trained_models/best_localization_model_iou_47.47_conv12_26_epochs.pth'
assert params.checkpoint_path != '', 'Please set the checkpoint path'

# set model parameters for testing
# state_dict = torch.load(params.checkpoint_path, map_location='cpu')
# params.decoder_type = state_dict['decoder_type']
# params.feature_layer = state_dict['feature_layer']

# set the batch size and num threads
params.batch_size = 64
params.num_threads = 8

In [None]:
params_vit = copy.deepcopy(params)
params_vit.arch = 'CLIP:ViT-L/14'
params_vit.checkpoint_path = '/content/drive/MyDrive/Colab Notebooks/Licenta/DeCLIP-custom/trained_models/best_localization_model_vit_iou_47.47_conv12_26_epochs.pth'
state_dict_vit = torch.load(params_vit.checkpoint_path, map_location='cpu')
params_vit.decoder_type = state_dict_vit['decoder_type']
params_vit.feature_layer = state_dict_vit['feature_layer']

params_rn50 = copy.deepcopy(params)
params_rn50.arch = 'CLIP:RN50'
params_rn50.checkpoint_path = '/content/drive/MyDrive/Colab Notebooks/Licenta/DeCLIP-custom/trained_models/best_localization_model_rn50_iou_45.57_conv12_45_epochs.pth'
state_dict_rn50 = torch.load(params_rn50.checkpoint_path, map_location='cpu')
params_rn50.decoder_type = state_dict_rn50['decoder_type']
params_rn50.feature_layer = state_dict_rn50['feature_layer']

params_vit_rn50 = copy.deepcopy(params)
params_vit_rn50.arch = 'CLIP:ViT-L/14,RN50'
params_vit_rn50.checkpoint_path = '/content/drive/MyDrive/Colab Notebooks/Licenta/DeCLIP-custom/trained_models/best_localization_model_vit+rn50_iou_46_conv4_19_epochs.pth'
state_dict_vit_rn50 = torch.load(params_vit_rn50.checkpoint_path, map_location='cpu')
params_vit_rn50.decoder_type = state_dict_vit_rn50['decoder_type']
params_vit_rn50.feature_layer = state_dict_vit_rn50['feature_layer']

models_params = [params_vit, params_rn50, params_vit_rn50]
state_dicts = [state_dict_vit, state_dict_rn50, state_dict_vit_rn50]

In [None]:
hard_voting_ensemble = VotingEnsembleModel(models_params, state_dicts, voting_method='hard')
soft_voting_ensemble = VotingEnsembleModel(models_params, state_dicts, voting_method='soft')

In [None]:
# prepare the datasets and the results file
if params.task_type == 'fully_supervised_localization':
    dataset_paths = LOCALIZATION_DATASET_PATHS
    with open(os.path.join(params.save_dir_results, 'scores.txt'), 'a') as f:
        f.write('dataset \t iou \t f1_best \t f1_fixed \t ap \n')
elif params.task_type == 'detection':
    dataset_paths = DETECTION_DATASET_PATHS
    with open(os.path.join(params.save_dir_results, 'scores.txt'), 'a') as f:
        f.write('dataset \t ap \t acc_fixed_thresh \t acc_best_thresh \t best_threshold \n')

In [None]:
# start the testing process
for dataset_path in dataset_paths:
    print(f"Testing on {dataset_path['key']}")
    set_seed()
    os.makedirs(os.path.join(params.save_dir_results, dataset_path['key']), exist_ok=True)

    params.train_dataset = os.path.join('/content/drive/MyDrive/Colab Notebooks/Licenta/DeCLIP-main/', dataset_path['key'])
    params.test_fake_path = os.path.join('/content/drive/MyDrive/Colab Notebooks/Licenta/DeCLIP-main/', dataset_path['fake_path'])
    params.test_masks_ground_truth_path = os.path.join('/content/drive/MyDrive/Colab Notebooks/Licenta/DeCLIP-main/', dataset_path['masks_path'])
    if params.task_type == 'detection':
        params.test_real_path = os.path.join('/content/drive/MyDrive/Colab Notebooks/Licenta/DeCLIP-main/', dataset_path['real_path'])

    data_loader = get_dataloader(params)

    # create the directory for dataset results
    os.makedirs(os.path.join(params.save_dir_results, params.train_dataset), exist_ok=True)

    if params.task_type == 'fully_supervised_localization':
        print('Testing hard voting ensemble')
        ious, f1_best, f1_fixed, ap, original_img_paths = validate_ensemble_fully_supervised_localization(
            hard_voting_ensemble, data_loader, dataset_path['key'], params.save_dir_results
        )
        save_image_localization_scores_to_file(ious, f1_best, f1_fixed, ap, original_img_paths, params)

        mean_iou = compute_mean_iou(ious, verbose=True, extra_text=dataset_path['key'])
        mean_f1_best, mean_f1_fixed = compute_mean_f1(f1_best, f1_fixed, verbose=True, extra_text=dataset_path['key'])
        mean_ap = compute_mean_ap(ap, verbose=True, extra_text=dataset_path['key'])

        with open(os.path.join(params.save_dir_results, 'scores.txt'), 'a') as f:
            f.write(f"{dataset_path['key']} \t {mean_iou:.4f} \t {mean_f1_best:.4f} \t {mean_f1_fixed:.4f} \t {mean_ap:.4f} \n")


        print('Testing soft voting ensemble')
        ious, f1_best, f1_fixed, ap, original_img_paths = validate_ensemble_fully_supervised_localization(
            soft_voting_ensemble, data_loader, dataset_path['key'], params.save_dir_results
        )
        save_image_localization_scores_to_file(ious, f1_best, f1_fixed, ap, original_img_paths, params)

        mean_iou = compute_mean_iou(ious, verbose=True, extra_text=dataset_path['key'])
        mean_f1_best, mean_f1_fixed = compute_mean_f1(f1_best, f1_fixed, verbose=True, extra_text=dataset_path['key'])
        mean_ap = compute_mean_ap(ap, verbose=True, extra_text=dataset_path['key'])

        with open(os.path.join(params.save_dir_results, 'scores.txt'), 'a') as f:
            f.write(f"{dataset_path['key']} \t {mean_iou:.4f} \t {mean_f1_best:.4f} \t {mean_f1_fixed:.4f} \t {mean_ap:.4f} \n")


    print()

Testing on lama
Testing hard voting ensemble
Length of dataset:  900


  0%|          | 0/15 [00:00<?, ?it/s]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 13%|█▎        | 2/15 [00:20<02:06,  9.70s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 40%|████      | 6/15 [00:48<01:06,  7.41s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 47%|████▋     | 7/15 [00:55<00:58,  7.28s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 53%|█████▎    | 8/15 [01:02<00:50,  7.20s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 67%|██████▋   | 10/15 [01:16<00:35,  7.12s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1


 73%|███████▎  | 11/15 [01:23<00:28,  7.12s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1


 80%|████████  | 12/15 [01:31<00:21,  7.09s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


100%|██████████| 15/15 [01:45<00:00,  7.04s/it]


lamaMean IOU: 40.92
lamaMean F1 best: 0.4968
lamaMean F1 fixed: 0.4968
lamaMean AP: 0.4719
Testing soft voting ensemble
Length of dataset:  900


100%|██████████| 15/15 [01:51<00:00,  7.44s/it]


lamaMean IOU: 39.65
lamaMean F1 best: 0.707
lamaMean F1 fixed: 0.5003
lamaMean AP: 0.679

Testing on ldm
Testing hard voting ensemble
Length of dataset:  900


  0%|          | 0/15 [00:00<?, ?it/s]

Empty predictions or ground truth, returning empty tensors in localization_f1


  7%|▋         | 1/15 [00:12<02:59, 12.85s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 20%|██        | 3/15 [00:27<01:40,  8.39s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 27%|██▋       | 4/15 [00:34<01:26,  7.87s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 33%|███▎      | 5/15 [00:41<01:15,  7.59s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 40%|████      | 6/15 [00:48<01:06,  7.43s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 47%|████▋     | 7/15 [00:55<00:58,  7.30s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 53%|█████▎    | 8/15 [01:02<00:50,  7.25s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1


 60%|██████    | 9/15 [01:09<00:43,  7.20s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1


 67%|██████▋   | 10/15 [01:16<00:35,  7.19s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1


 73%|███████▎  | 11/15 [01:23<00:28,  7.15s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 80%|████████  | 12/15 [01:30<00:21,  7.14s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 87%|████████▋ | 13/15 [01:38<00:14,  7.12s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


100%|██████████| 15/15 [01:45<00:00,  7.03s/it]


ldmMean IOU: 49.12
ldmMean F1 best: 0.5915
ldmMean F1 fixed: 0.5915
ldmMean AP: 0.5187
Testing soft voting ensemble
Length of dataset:  900


100%|██████████| 15/15 [01:51<00:00,  7.43s/it]


ldmMean IOU: 48.35
ldmMean F1 best: 0.7389
ldmMean F1 fixed: 0.5991
ldmMean AP: 0.7151

Testing on pluralistic
Testing hard voting ensemble
Length of dataset:  900


 20%|██        | 3/15 [00:27<01:41,  8.47s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1


 33%|███▎      | 5/15 [00:41<01:16,  7.65s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1


 73%|███████▎  | 11/15 [01:24<00:28,  7.14s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 80%|████████  | 12/15 [01:31<00:21,  7.12s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


100%|██████████| 15/15 [01:45<00:00,  7.06s/it]


pluralisticMean IOU: 52.96
pluralisticMean F1 best: 0.6086
pluralisticMean F1 fixed: 0.6086
pluralisticMean AP: 0.5548
Testing soft voting ensemble
Length of dataset:  900


100%|██████████| 15/15 [01:51<00:00,  7.47s/it]


pluralisticMean IOU: 52.59
pluralisticMean F1 best: 0.7626
pluralisticMean F1 fixed: 0.6122
pluralisticMean AP: 0.7474

Testing on repaint-p2-9k
Testing hard voting ensemble
Length of dataset:  900


  0%|          | 0/15 [00:00<?, ?it/s]

Empty predictions or ground truth, returning empty tensors in localization_f1


  7%|▋         | 1/15 [00:13<03:08, 13.47s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1


 13%|█▎        | 2/15 [00:20<02:07,  9.78s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1


 20%|██        | 3/15 [00:27<01:42,  8.53s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 47%|████▋     | 7/15 [00:56<00:58,  7.37s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1


 60%|██████    | 9/15 [01:10<00:43,  7.25s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 67%|██████▋   | 10/15 [01:17<00:35,  7.18s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1


 73%|███████▎  | 11/15 [01:24<00:28,  7.14s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


 80%|████████  | 12/15 [01:31<00:21,  7.12s/it]

Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1
Empty predictions or ground truth, returning empty tensors in localization_f1


100%|██████████| 15/15 [01:46<00:00,  7.09s/it]


repaint-p2-9kMean IOU: 43.77
repaint-p2-9kMean F1 best: 0.5281
repaint-p2-9kMean F1 fixed: 0.5281
repaint-p2-9kMean AP: 0.4824
Testing soft voting ensemble
Length of dataset:  900


100%|██████████| 15/15 [01:52<00:00,  7.47s/it]


repaint-p2-9kMean IOU: 43.11
repaint-p2-9kMean F1 best: 0.6904
repaint-p2-9kMean F1 fixed: 0.5305
repaint-p2-9kMean AP: 0.6762

Testing on autosplice_jpeg75
Testing hard voting ensemble
Length of dataset:  3621


  0%|          | 0/57 [00:38<?, ?it/s]


KeyboardInterrupt: 

In [None]:
import shutil
from google.colab import files

# Arhivare folder
shutil.make_archive('results_voting_ensemble_own', 'zip', '/content/experiments/test_ensemble/results')

# Descărcare
files.download('results_voting_ensemble_own.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>