# Nightshade penalty method validation and Analysis

## Imports

In [None]:
# Package imports
import torch
import numpy as np
from PIL import Image
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from diffusers import StableDiffusionPipeline
from IPython.display import display
import time
import pandas as pd
import seaborn as sns
import clip

# Our custom implementations
from nightshade import Nightshade
from perturbation_methods import fgsm_penalty, pgd_penalty, nightshade_penalty

: 

## Configuration

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
target_concept = 'cat'
epsilon = 0.1
test_image_path = 'test_images/dog.jpg'


## Initialize Nightshade

In [None]:
methods = {
    'fgsm': fgsm_penalty,
    'pgd': pgd_penalty,
    'nightshade': nightshade_penalty
}

nightshade_instances = {
    name: Nightshade(
        target_concept=target_concept,
        device=device,
        epsilon=epsilon,
        method=method,
    ) 
    for name, method in methods.items()
}

In [None]:
def load_and_prepare_image(path, size=512):
    img = Image.open(path).convert("RGB")
    transform = transforms.Compose([
        transforms.Resize(size),
        transforms.CenterCrop(size),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ])
    return transform(img).unsqueeze(0).to(device)

original_img = load_and_prepare_image(test_image_path)

def visualize_results(original, perturbed, method_name):
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))
    
    # Original image
    orig_img = original.squeeze().cpu().numpy().transpose(1, 2, 0)
    orig_img = (orig_img * 0.5 + 0.5).clip(0, 1)
    ax1.imshow(orig_img)
    ax1.set_title("Original Image")
    ax1.axis('off')
    
    # Perturbed image
    pert_img = perturbed.squeeze().cpu().numpy().transpose(1, 2, 0)
    pert_img = (pert_img * 0.5 + 0.5).clip(0, 1)
    ax2.imshow(pert_img)
    ax2.set_title(f"{method_name} Perturbed")
    ax2.axis('off')
    
    # Difference (magnified)
    diff = np.abs(pert_img - orig_img) * 10  # Amplify differences
    ax3.imshow(diff.clip(0, 1))
    ax3.set_title("Difference (10x)")
    ax3.axis('off')
    
    plt.tight_layout()
    plt.show()

In [None]:
print("## Validation Test 1: Basic Functionality")

test_img = Image.open(test_image_path).convert("RGB")

for name, instance in nightshade_instances.items():
    print(f"\nTesting {name} method...")
    start_time = time.time()
    perturbed_img = instance.generate(test_img)
    elapsed = time.time() - start_time
    
    print(f"Completed in {elapsed:.2f} seconds")
    display(perturbed_img)
    
    # Convert back to tensor for visualization
    perturbed_tensor = instance.transform(perturbed_img).unsqueeze(0).to(DEVICE)
    visualize_results(original_img, perturbed_tensor, name)

In [None]:
print("## Validation Test 2: Latent Space Manipulation")

def compare_latents(original, perturbed, nightshade_instance):
    with torch.no_grad():
        orig_latent = nightshade_instance.get_latent(original)
        pert_latent = nightshade_instance.get_latent(perturbed)
    
    distance = torch.norm(orig_latent - pert_latent).item()
    cosine_sim = torch.nn.functional.cosine_similarity(
        orig_latent.flatten(), 
        pert_latent.flatten(), 
        dim=0
    ).item()
    
    return distance, cosine_sim

results = []
for name, instance in nightshade_instances.items():
    perturbed_img = instance.generate(test_img)
    perturbed_tensor = instance.transform(perturbed_img).unsqueeze(0).to(DEVICE)
    
    dist, sim = compare_latents(original_img, perturbed_tensor, instance)
    results.append({
        "Method": name,
        "Latent Distance": dist,
        "Cosine Similarity": sim
    })

results_df = pd.DataFrame(results)
print("\nLatent Space Comparison:")
display(results_df)

In [None]:
print("## Validation Test 3: Effectiveness Against Model")

def test_model_misclassification(original, perturbed, target_concept):
    # Load CLIP model for evaluation
    model, preprocess = clip.load("ViT-B/32", device=DEVICE)
    
    # Preprocess images
    orig_preprocessed = preprocess(original).unsqueeze(0).to(DEVICE)
    pert_preprocessed = preprocess(perturbed).unsqueeze(0).to(DEVICE)
    
    # Text prompts
    text_inputs = clip.tokenize([
        f"a photo of a {target_concept}",
        "a photo of an artwork",
        "a photo of the original content"
    ]).to(DEVICE)
    
    # Calculate features
    with torch.no_grad():
        orig_features = model.encode_image(orig_preprocessed)
        pert_features = model.encode_image(pert_preprocessed)
        text_features = model.encode_text(text_inputs)
    
    # Calculate similarities
    orig_similarities = (orig_features @ text_features.T).softmax(dim=-1)
    pert_similarities = (pert_features @ text_features.T).softmax(dim=-1)
    
    return {
        "Original Confidence": orig_similarities[0][0].item(),
        "Perturbed Confidence": pert_similarities[0][0].item(),
        "Confidence Increase": pert_similarities[0][0].item() - orig_similarities[0][0].item()
    }

# Run evaluation
evaluation_results = []
for name, instance in nightshade_instances.items():
    perturbed_img = instance.generate(test_img)
    perturbed_tensor = instance.transform(perturbed_img).unsqueeze(0).to(DEVICE)
    
    metrics = test_model_misclassification(
        original_img, 
        perturbed_tensor, 
        TARGET_CONCEPT
    )
    metrics["Method"] = name
    evaluation_results.append(metrics)

evaluation_df = pd.DataFrame(evaluation_results)
print("\nMisclassification Effectiveness:")
display(evaluation_df)