# Nightshade Poisoning

## Imports

In [1]:
from diffusers import StableDiffusionPipeline
import torch
from tqdm.notebook import tqdm
import glob
import pickle
import os
import numpy as np
from PIL import Image

# Custom Implementations
from py_files.data_process import get_dataset, get_poisoning_candidates, get_anchor_images
from py_files.nightshade import Nightshade
from py_files.perturbation_methods import fgsm_penalty, pgd_penalty, nightshade_penalty

%load_ext autoreload
%autoreload 2
%matplotlib inline

SD_MODEL_ID = 'runwayml/stable-diffusion-v1-5'
CONCEPT = 'dog'
TARGET_CONCEPT = 'cat'
NUM_TARGET_IMAGES = 300
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

shared_pipeline = StableDiffusionPipeline.from_pretrained(
    SD_MODEL_ID,
    safety_checker=None,
    torch_dtype=torch.float16,
).to(device)
shared_pipeline.set_progress_bar_config(disable=True)

%load_ext autoreload
%autoreload 2


Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]




Loading model from: /home/zabibeau/Spring2025/MachineLearning2/nightshade-ml/nightshade_env/lib/python3.11/site-packages/lpips/weights/v0.1/vgg.pth


Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load COCO Dataset

In [None]:
coco_dataset = get_dataset(
    annotation_file='annotations/captions_train2014.json', 
    data_dir='train2014',
    )

## Get Poisoning Candidates

In [None]:
# Get top 1000 poisoning candidates
poisoning_candidates = get_poisoning_candidates(
    df=coco_dataset, 
    concept=CONCEPT, 
    num_candidates=NUM_TARGET_IMAGES
    )

# Create 1000 anchor images to use in poisoning
get_anchor_images(
    pipeline=shared_pipeline,
    target_concept=TARGET_CONCEPT,
    num_images=NUM_TARGET_IMAGES,
    output_dir='poisoning_candidates/anchor_images'
)


## Poison Candidates

In [2]:
candidate_files = glob.glob('poisoning_candidates/pickle/dog_*.p')
candidates = [pickle.load(open(f, 'rb')) for f in candidate_files]
methods = {
    # 'fgsm': fgsm_penalty,
    # 'pgd': pgd_penalty,
    'original': nightshade_penalty,
}

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
nightshade = Nightshade(target_concept=None, device=device, sd_pipeline=shared_pipeline)
for name, method in methods.items():
    output_dir = f'poisoned_images/{name}'
    nightshade.penalty_method = method
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if not os.path.exists(f"{output_dir}/pickle"):
        os.makedirs(f"{output_dir}/pickle")
    if not os.path.exists(f"{output_dir}/images"):
        os.makedirs(f"{output_dir}/images")
    for i, candidate in tqdm(enumerate(candidates), desc=f'Poisoning with {name} method', total=len(candidates)):
        
        original_img = Image.fromarray(candidate['img'])
        anchor_path = f'poisoning_candidates/anchor_images/anchor_{i:04d}.jpg'
        poisoned_img = nightshade.generate(original_img, target_concept='cat', target_anchor_path=anchor_path)
        poisoned_data = {
            'img': np.array(poisoned_img),
            'text': candidate['text']
        }
        pickle.dump(poisoned_data, open(f"{output_dir}/pickle/{name}_{candidate['image_id']}.p", 'wb'))
        poisoned_img.save(f"{output_dir}/images/{name}_{candidate['image_id']}.jpg")
    print(f'Saved {len(candidates)} poisoned images for {name} method in {output_dir}')

Poisoning with original method:   0%|          | 0/300 [00:00<?, ?it/s]

W0509 17:47:55.873000 123453 torch/_inductor/utils.py:1137] [0/0] Not enough SMs to use max_autotune_gemm mode


[0/20]	Latent Loss: 66.4375	LPIPS: 0.0000
[19/20]	Latent Loss: 22.9062	LPIPS: 0.6029
[0/20]	Latent Loss: 50.0312	LPIPS: 0.0000
[19/20]	Latent Loss: 21.2812	LPIPS: 0.5521
[0/20]	Latent Loss: 54.8125	LPIPS: 0.0000
[19/20]	Latent Loss: 19.0156	LPIPS: 0.6936
[0/20]	Latent Loss: 50.4375	LPIPS: 0.0000
[19/20]	Latent Loss: 19.1562	LPIPS: 0.6391
[0/20]	Latent Loss: 57.0938	LPIPS: 0.0000
[19/20]	Latent Loss: 15.8750	LPIPS: 0.7551
[0/20]	Latent Loss: 43.8438	LPIPS: 0.0000
[19/20]	Latent Loss: 19.4219	LPIPS: 0.4198
[0/20]	Latent Loss: 38.8438	LPIPS: 0.0000
[19/20]	Latent Loss: 12.6562	LPIPS: 0.6255
[0/20]	Latent Loss: 43.6250	LPIPS: 0.0000
[19/20]	Latent Loss: 15.8828	LPIPS: 0.5501
[0/20]	Latent Loss: 42.3125	LPIPS: 0.0000
[19/20]	Latent Loss: 15.3594	LPIPS: 0.6607
[0/20]	Latent Loss: 38.9688	LPIPS: 0.0000
[19/20]	Latent Loss: 15.2500	LPIPS: 0.5040
[0/20]	Latent Loss: 43.8125	LPIPS: 0.0000
[19/20]	Latent Loss: 13.8203	LPIPS: 0.5754
[0/20]	Latent Loss: 36.2188	LPIPS: 0.0000
[19/20]	Latent Loss: 12