# Finding hyperparameters

In [1]:
import os
import torch
import numpy as np
import pickle
from tqdm.notebook import tqdm
from PIL import Image
import clip
from torchvision import transforms
from skimage.metrics import structural_similarity as ssim, peak_signal_noise_ratio as psnr
from perturbation_methods import fgsm_penalty, pgd_penalty, nightshade_penalty
from nightshade import Nightshade
from diffusers import StableDiffusionPipeline

%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
CLIP_MODEL, CLIP_PREPROCESS = clip.load("ViT-B/32", device=DEVICE)


KeyboardInterrupt: 

In [None]:
def load_subset(path, max_samples=10):
    files = [f for f in sorted(os.listdir(path)) if f.endswith(".p")][:max_samples]
    subset = []
    for f in files:
        with open(os.path.join(path, f), 'rb') as pf:
            data = pickle.load(pf)
            subset.append(data)
    return subset

images = load_subset('poisoning_candidates/pickle', max_samples=20)
print(f"Loaded {len(images)} images from the dataset.")


Loaded 20 images from the dataset.


In [None]:
def compute_clip_similarity(image, text):
    image_tensor = CLIP_PREPROCESS(image).unsqueeze(0).to(DEVICE)
    text_tokens = clip.tokenize([text]).to(DEVICE)
    with torch.no_grad():
        image_features = CLIP_MODEL.encode_image(image_tensor)
        text_features = CLIP_MODEL.encode_text(text_tokens)
    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)
    return (image_features @ text_features.T).item()

def evaluate_attack(original_img, poisoned_img, target_text):
    # Convert to arrays
    orig_arr = np.array(original_img.resize((224, 224)))
    poison_arr = np.array(poisoned_img.resize((224, 224)))

    # Metrics
    clip_score_target = compute_clip_similarity(poisoned_img, target_text)
    clip_score_original = compute_clip_similarity(poisoned_img, "")  # empty prompt for base concept
    l2_dist = np.linalg.norm(poison_arr.astype(np.float32) - orig_arr.astype(np.float32))
    ssim_val = ssim(orig_arr, poison_arr, channel_axis=-1, data_range=255)
    psnr_val = psnr(orig_arr, poison_arr, data_range=255)

    return {
        "clip_target": clip_score_target,
        "clip_original": clip_score_original,
        "l2": l2_dist,
        "ssim": ssim_val,
        "psnr": psnr_val
    }


In [None]:
fgsm_eps = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]

pgd_eps = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
pgd_step_size = [0.01, 0.05, 0.1, 0.2]  
pgd_iterations = [5, 10, 20, 50, 100]

nightshade_eps = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
nightshade_iterations = [5, 10, 20, 50, 100, 150, 200]

pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16,
    safety_checker=None,
).to(DEVICE)
pipe._progress_bar_config={"disable": True}
target_text = 'cat'

ns = Nightshade('cat', DEVICE, None, pipe)

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


In [None]:
# FGSM
with open("fgsm_results.txt", "w") as f:
    results = []
    for image_data in tqdm(images, desc="Images"):
        original_img = Image.fromarray(image_data['img'])
        ns.penalty_method = fgsm_penalty
        for eps in tqdm(fgsm_eps, description="FGSM Epsilon", postfix=eps):
            ns.eps = eps
            poisoned_img = ns.generate(original_img, target_text)
            metrics = evaluate_attack(original_img, poisoned_img, target_text)
            results.append((eps, metrics))
        
    # Average the results for each epsilon
    avg_results = {}
    for eps, metrics in results:
        if eps not in avg_results:
            avg_results[eps] = {k: [] for k in metrics.keys()}
        for k, v in metrics.items():
            avg_results[eps][k].append(v)
    for eps, metrics in avg_results.items():
        avg_results[eps] = {k: np.mean(v) for k, v in metrics.items()}
        f.write(f"FGSM Epsilon: {eps}\n")
        for k, v in avg_results[eps].items():
            f.write(f"{k}: {v}\n")
        f.write("\n")



  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
# PGD
with open('pgd_results.txt', 'w') as f:
    results = []
    for image_data in tqdm(images):
        original_img = Image.fromarray(image_data['img'])
        ns.penalty_method = pgd_penalty
        for eps in tqdm(pgd_eps, desc="PGD Epsilon", postfix=eps):
            for step_size in tqdm(pgd_step_size, desc="PGD Step Size", postfix=step_size):
                for iterations in tqdm(pgd_iterations, desc="PGD Iterations", postfix=iterations):
                    ns.eps = eps
                    ns.step_size = step_size
                    ns.iterations = iterations
                    poisoned_img = ns.generate(original_img, target_text)
                    metrics = evaluate_attack(original_img, poisoned_img, target_text)
                    results.append((eps, step_size, iterations, metrics))
        
    # Average the results for each combination of parameters
    avg_results = {}
    for eps, step_size, iterations, metrics in results:
        key = (eps, step_size, iterations)
        if key not in avg_results:
            avg_results[key] = {k: [] for k in metrics.keys()}
        for k, v in metrics.items():
            avg_results[key][k].append(v)
    for key, metrics in avg_results.items():
        avg_results[key] = {k: np.mean(v) for k, v in metrics.items()}
        f.write(f"PGD Epsilon: {key[0]}, Step Size: {key[1]}, Iterations: {key[2]}\n")
        for k, v in avg_results[key].items():
            f.write(f"{k}: {v}\n")
        f.write("\n")

In [None]:
# Original Nightshade penalty
with open('nightshade_results.txt', 'w') as f:
    results = []
    for image_data in tqdm(images):
        original_img = Image.fromarray(image_data['img'])
        ns.penalty_method = nightshade_penalty
        for eps in tqdm(nightshade_eps, desc="Nightshade Epsilon", postfix=eps):
            for iterations in tqdm(nightshade_iterations, desc="Nightshade Iterations", postfix=iterations):
                ns.eps = eps
                ns.iterations = iterations
                poisoned_img = ns.generate(original_img, target_text)
                metrics = evaluate_attack(original_img, poisoned_img, target_text)
                results.append((eps, iterations, metrics))
        
    # Average the results for each combination of parameters
    avg_results = {}
    for eps, iterations, metrics in results:
        key = (eps, iterations)
        if key not in avg_results:
            avg_results[key] = {k: [] for k in metrics.keys()}
        for k, v in metrics.items():
            avg_results[key][k].append(v)
    for key, metrics in avg_results.items():
        avg_results[key] = {k: np.mean(v) for k, v in metrics.items()}
        f.write(f"Nightshade Epsilon: {key[0]}, Iterations: {key[1]}\n")
        for k, v in avg_results[key].items():
            f.write(f"{k}: {v}\n")
        f.write("\n")