### Invariance property of CNNs

In [10]:
import torch
import torch.nn.functional as F
from torchvision import models, transforms, datasets
from torchvision.transforms import functional as TF
from torch.utils.data import DataLoader
import pandas as pd
from tqdm import tqdm
import os

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print("Using device:", device)

# ----- 1. Load pretrained models -----
torch.hub.set_dir(os.path.abspath("resnet50"))
resnet50 = models.resnet50(pretrained=True).to(device).eval()

torch.hub.set_dir(os.path.abspath("alexnet"))
alexnet = models.alexnet(pretrained=True).to(device).eval()
models_dict = {'ResNet50': resnet50, 'AlexNet': alexnet}

# ----- 2. Dataset (100 random synthetic images) -----
base_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# ----- 2. Dataset (CIFAR-100, only 100 images) -----
cifar_root = os.path.abspath("./cifar100_data")

# Download CIFAR-100 dataset into current directory if not present
full_dataset = datasets.CIFAR100(
    root=cifar_root,
    train=False,          # use test split for convenience
    download=False,
    transform=base_transform
)

# Take only first 100 samples
subset_indices = list(range(100))
dataset = torch.utils.data.Subset(full_dataset, subset_indices)

# DataLoader
dataloader = DataLoader(dataset, batch_size=10, shuffle=False)

# ----- 3. Extended Transformations -----
def get_transform(name):
    """
    Each transform operates on a PIL image, and we reapply ToTensor + Normalize at the end.
    The bug in your code: noise transform was being applied on PIL Image â€” now fixed.
    """
    aug = None
    if name == "small_shift":
        aug = transforms.RandomAffine(degrees=0, translate=(0.1, 0.1))
    elif name == "flip":
        aug = transforms.RandomHorizontalFlip(p=1.0)
    elif name == "noise":
        # Noise must be added on tensor, not PIL
        def add_noise(x):
            x = TF.to_tensor(x)
            x = torch.clamp(x + 0.01 * torch.randn_like(x), 0, 1)
            return x
        return transforms.Compose([
            transforms.Lambda(add_noise),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
    elif name.startswith("rotate_"):
        deg = int(name.split("_")[1])
        aug = transforms.RandomRotation(degrees=deg)
    elif name.startswith("scale_"):
        scale = float(name.split("_")[1])
        aug = transforms.Compose([
            transforms.Resize(int(224 * scale)),
            transforms.CenterCrop(224)
        ])
    else:
        aug = transforms.Lambda(lambda x: x)

    # âœ… Always reapply ToTensor and Normalize for model input
    return transforms.Compose([
        aug,
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

transform_names = [
    "small_shift", "flip", "noise",
    "rotate_10", "rotate_20", "rotate_30", "rotate_45",
    "scale_0.8", "scale_1.2", "scale_1.5"
]

# ----- 4. Invariance Evaluation -----
@torch.no_grad()
def invariance_score(model, loader, transform_name):
    transform = get_transform(transform_name)
    total_cosine = 0.0
    count = 0

    for imgs, _ in tqdm(loader, desc=f"{transform_name}"):
        transformed_imgs = []
        for img in imgs:
            # Convert normalized tensor -> unnormalized PIL
            img = img.cpu() * torch.tensor([0.229, 0.224, 0.225]).view(3,1,1)
            img = img + torch.tensor([0.485, 0.456, 0.406]).view(3,1,1)
            img_pil = TF.to_pil_image(img.clamp(0,1))
            transformed_imgs.append(transform(img_pil))

        imgs_t = torch.stack(transformed_imgs)
        imgs, imgs_t = imgs.to(device), imgs_t.to(device)

        out1 = F.softmax(model(imgs), dim=1)
        out2 = F.softmax(model(imgs_t), dim=1)

        cos = F.cosine_similarity(out1, out2).mean().item()
        total_cosine += cos
        count += 1

    return total_cosine / count if count > 0 else 0.0

# ----- 5. Run all experiments -----
results = []
for name, model in models_dict.items():
    row = {'Model': name}
    for tname in transform_names:
        score = invariance_score(model, dataloader, tname)
        row[tname] = round(score * 100, 2)
        print(f"{name} - {tname}: {row[tname]}% similarity")
    results.append(row)

# ----- 6. Summary -----
df = pd.DataFrame(results)
print("\nðŸ“Š Invariance Similarity Summary (Cosine %):\n")


Using device: mps


small_shift: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:01<00:00,  6.24it/s]


ResNet50 - small_shift: 80.71% similarity


flip: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:01<00:00,  6.31it/s]


ResNet50 - flip: 88.25% similarity


noise: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:01<00:00,  5.80it/s]


ResNet50 - noise: 42.16% similarity


rotate_10: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:01<00:00,  6.27it/s]


ResNet50 - rotate_10: 44.19% similarity


rotate_20: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:01<00:00,  6.24it/s]


ResNet50 - rotate_20: 30.94% similarity


rotate_30: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:01<00:00,  6.14it/s]


ResNet50 - rotate_30: 22.7% similarity


rotate_45: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:01<00:00,  6.23it/s]


ResNet50 - rotate_45: 13.32% similarity


scale_0.8: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:01<00:00,  6.20it/s]


ResNet50 - scale_0.8: 46.68% similarity


scale_1.2: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:01<00:00,  6.12it/s]


ResNet50 - scale_1.2: 80.21% similarity


scale_1.5: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:01<00:00,  6.09it/s]


ResNet50 - scale_1.5: 48.49% similarity


small_shift: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:00<00:00, 27.03it/s]


AlexNet - small_shift: 62.97% similarity


flip: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:00<00:00, 28.24it/s]


AlexNet - flip: 90.07% similarity


noise: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:00<00:00, 21.60it/s]


AlexNet - noise: 54.07% similarity


rotate_10: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:00<00:00, 28.49it/s]


AlexNet - rotate_10: 45.89% similarity


rotate_20: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:00<00:00, 28.20it/s]


AlexNet - rotate_20: 31.44% similarity


rotate_30: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:00<00:00, 28.55it/s]


AlexNet - rotate_30: 21.61% similarity


rotate_45: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:00<00:00, 26.98it/s]


AlexNet - rotate_45: 19.71% similarity


scale_0.8: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:00<00:00, 27.08it/s]


AlexNet - scale_0.8: 16.75% similarity


scale_1.2: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:00<00:00, 26.76it/s]


AlexNet - scale_1.2: 79.66% similarity


scale_1.5: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:00<00:00, 26.06it/s]

AlexNet - scale_1.5: 60.23% similarity

ðŸ“Š Invariance Similarity Summary (Cosine %):






In [11]:
df.head(5)

Unnamed: 0,Model,small_shift,flip,noise,rotate_10,rotate_20,rotate_30,rotate_45,scale_0.8,scale_1.2,scale_1.5
0,ResNet50,80.71,88.25,42.16,44.19,30.94,22.7,13.32,46.68,80.21,48.49
1,AlexNet,62.97,90.07,54.07,45.89,31.44,21.61,19.71,16.75,79.66,60.23
