# Nightshade penalty method validation and Analysis

## Imports

In [None]:
# Package imports
import torch
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from IPython.display import display
import json
import glob
import os
from tqdm.notebook import tqdm


# Our custom implementations
from nightshade import Nightshade
from perturbation_methods import fgsm_penalty, pgd_penalty, nightshade_penalty
from data_process import get_dataset, get_poisoning_candidates

%load_ext autoreload
%autoreload 2
%matplotlib inline

TRAIN_DATA_DIR = 'train2014'
ANNOTATION_FILENAME = 'annotations/captions_train2014.json'

## Configuration

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
target_concept = 'cat'
epsilon = 0.05
test_image_path = 'test_images/dog.jpg'


## Load Dataset

In [None]:
def load_annotations(annotations_path):
    annotation_file = json.load(open(annotations_path, 'r'))
    print(f"Loaded {len(annotation_file)} annotations from {annotations_path}")
    print(f"Example annotation: {annotation_file['annotations'][0]}")
    return annotation_file

file = load_annotations('annotations/captions_train2014.json')
print(f"Number of annotations: {len(file['annotations'])}")
print(f"Number of images: {len(file['images'])}")

## Get poisoning candidates

In [None]:
OUTPUT_DIR = 'poisoning_candidates'
ANNOTATION_FILENAME = 'annotations/captions_train2014.json'
TRAIN_DATA_DIR = 'train2014'
df = get_dataset(ANNOTATION_FILENAME, TRAIN_DATA_DIR, limit=1000, unique_images=True)

get_poisoning_candidates(
    df = df,
    concept="dog",
    num_candidates=5,
    output_dir=OUTPUT_DIR,
)

### Poisoining Candidate Validation

In [None]:
import pickle
import matplotlib.pyplot as plt

# Load a poisoned candidate file
with open("poisoning_candidates/dog_0.p", "rb") as f:
    data = pickle.load(f)

# Display the image and caption
plt.imshow(data["img"])
plt.title(f"Caption: {data['text']}")
plt.axis("off")
plt.show()

## Nightshade Poisoning

In [None]:
candidate_files = glob.glob("poisoning_candidates/dog_*.p")
candidates = [pickle.load(open(f, 'rb')) for f in candidate_files]

img = Image.fromarray(candidates[0]['img'])
display(img.resize((256, 256)))
print(f"Caption: {candidates[0]['text']}")

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
target_concept = 'cat'
nightshade = Nightshade(
    target_concept=target_concept,
    device=device,
    eps = 0.1,
    penalty_method=nightshade_penalty,
)

In [None]:
output_dir = 'poisoned_images'
os.makedirs(output_dir, exist_ok=True)

for i, candidate in tqdm(enumerate(candidates), desc="Poisoning Images:", total=len(candidates)):
    img = Image.fromarray(candidate['img'])

    poisoned_img = nightshade.generate(img, target_concept)

    poisoned_data = {
        "img": np.array(poisoned_img),
        "text": candidate['text'],
    }

    pickle.dump(poisoned_data, open(f"{output_dir}/poisoned_dog_{i}.p", "wb"))

print(f"Saved {len(candidates)} poisoned images to {output_dir}")

    

In [None]:
# display poisoned images
poisoned_files = glob.glob(f"{output_dir}/poisoned_dog_*.p")
poisoned_candidates = [pickle.load(open(f, 'rb')) for f in poisoned_files]
for i, poisoned_candidate in enumerate(poisoned_candidates[:20]):
    img = Image.fromarray(poisoned_candidate['img'])
    display(img)
    print(f"Caption: {poisoned_candidate['text']}")
