In [1]:
from tqdm import tqdm

import torch
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import FashionMNIST

# Extracting Flattened Features

In [2]:
train_data = FashionMNIST(root='./Data', download=True, train=True, transform=transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize(mean=0.2860, std=0.3530)]))
test_data = FashionMNIST(root='./Data', download=True, train=False, transform=transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize(mean=0.2860, std=0.3530)]))

data = {'train': train_data,
        'test': test_data}

In [3]:
# feature size should be nx(28*28)

save_path = './Data/Features/flattened_features.pt'
batch_size = 512

features = {}
for key, value in data.items():
    print(f'\nExtracting {key} Features...')

    loader = DataLoader(value, batch_size, shuffle=False, num_workers=3)

    running_features, running_labels = torch.tensor([]), torch.tensor([])
    for images, labels in tqdm(loader):
        running_features = torch.cat(
            [running_features, torch.flatten(images, start_dim=1)], dim=0)
        running_labels = torch.cat([running_labels, labels], dim=0)

    features[key] = [running_features.numpy(), running_labels.numpy()]

torch.save(features, save_path)
print("\nFeature Dict Saved to, ", save_path)


Extracting train Features...


  0%|          | 0/118 [00:00<?, ?it/s]

100%|██████████| 118/118 [00:05<00:00, 22.30it/s]



Extracting test Features...


100%|██████████| 20/20 [00:00<00:00, 29.92it/s]



Feature Dict Saved to,  ./Data/Features/flattened_features.pt


In [4]:
del train_data, test_data, data, save_path, batch_size, features, loader, running_features, running_labels, images, labels

# Extracting Resnet18 Features

In [5]:
from torchvision.models import ResNet18_Weights

train_data = FashionMNIST(root='./Data', download=True, train=True, transform=transforms.Compose(
    [transforms.Lambda(lambda x: x.convert('RGB')), ResNet18_Weights.IMAGENET1K_V1.transforms(antialias=True)]))
test_data = FashionMNIST(root='./Data', download=True, train=False, transform=transforms.Compose(
    [transforms.Lambda(lambda x: x.convert('RGB')), ResNet18_Weights.IMAGENET1K_V1.transforms(antialias=True)]))

data = {'train': train_data,
        'test': test_data}

In [6]:
from torchvision.models import resnet18

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = resnet18()
# Weights from https://download.pytorch.org/models/resnet18-f37072fd.pth
model.load_state_dict(torch.load('./Data/Models/resnet18-f37072fd.pth'))
model.fc = torch.nn.Flatten()

model.eval()
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:
# feature size should be nx512

save_path = './Data/Features/resnet18_features.pt'
batch_size = 256
num_workers = 3

features = {}
for key, value in data.items():
    print(f'\nExtracting {key} Features...')

    loader = DataLoader(value, batch_size, num_workers=num_workers)

    running_features, running_labels = torch.tensor([]), torch.tensor([])
    with torch.no_grad():
        for images, labels in tqdm(loader):
            images = images.to(device)
            running_features = torch.cat(
                [running_features, model(images).to('cpu')], dim=0)
            running_labels = torch.cat([running_labels, labels], dim=0)

    features[key] = [running_features.numpy(), running_labels.numpy()]

torch.save(features, save_path)
print("\nFeature Dict Saved to, ", save_path)


Extracting train Features...


100%|██████████| 235/235 [01:40<00:00,  2.35it/s]



Extracting test Features...


100%|██████████| 40/40 [00:16<00:00,  2.35it/s]



Feature Dict Saved to,  ./Data/Features/resnet18_features.pt


In [12]:
del train_data, test_data, device, data, save_path, batch_size, num_workers, features, loader, running_features, running_labels, images, labels

# Extracting Vit Features

In [2]:
from torchvision.models import ViT_B_16_Weights

train_data = FashionMNIST(root='./Data', download=True, train=True, transform=transforms.Compose(
    [transforms.Lambda(lambda x: x.convert('RGB')), ViT_B_16_Weights.IMAGENET1K_V1.transforms(antialias=True)]))
test_data = FashionMNIST(root='./Data', download=True, train=False, transform=transforms.Compose(
    [transforms.Lambda(lambda x: x.convert('RGB')), ViT_B_16_Weights.IMAGENET1K_V1.transforms(antialias=True)]))

data = {'train': train_data,
        'test': test_data}

In [3]:
from torchvision.models import vit_b_16

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = vit_b_16()
# Weights from https://download.pytorch.org/models/vit_b_16-c867db91.pth
model.load_state_dict(torch.load('./Data/Models/vit_b_16-c867db91.pth'))
model.heads = torch.nn.Flatten()

model.eval()
model.to(device)

VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate='none')
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=3072, out_features=768, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_a

In [4]:
# feature size should be nx768

save_path = './Data/Features/vit_b_16_features.pt'
batch_size = 256
num_workers = 2

features = {}
for key, value in data.items():
    print(f'\nExtracting {key} Features...')

    loader = DataLoader(value, batch_size, num_workers=num_workers)

    running_features, running_labels = torch.tensor([]), torch.tensor([])
    with torch.no_grad():
        for images, labels in tqdm(loader):
            images = images.to(device)
            running_features = torch.cat(
                [running_features, model(images).to('cpu')], dim=0)
            running_labels = torch.cat([running_labels, labels], dim=0)

    features[key] = [running_features.numpy(), running_labels.numpy()]

torch.save(features, save_path)
print("\nFeature Dict Saved to, ", save_path)


Extracting train Features...


100%|██████████| 235/235 [23:16<00:00,  5.94s/it]



Extracting test Features...


100%|██████████| 40/40 [05:38<00:00,  8.47s/it]



Feature Dict Saved to,  ./Data/Features/vit_b_16_features.pt


In [5]:
del train_data, test_data, device, data, save_path, batch_size, num_workers, features, loader, running_features, running_labels, images, labels