In [1]:
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.models import vit_b_32
from sklearn.model_selection import train_test_split
import torch.nn as nn
import pandas as pd
import numpy as np
import os
from PIL import Image
import torch.optim as optim
import pickle

In [2]:
if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device = 'mps'
else:
    device = 'cpu'
print(f"device: {device}")

device: cuda


In [3]:
topicModel = 'LDA' # LDA or NMF

In [4]:
with open(f'./data/processed/{topicModel}_train.pkl', 'rb') as f:
    train_dataset = pickle.load(f) 
with open(f'./data/processed/{topicModel}_val.pkl', 'rb') as f:
    val_dataset = pickle.load(f) 
with open(f'./data/processed/{topicModel}_test.pkl', 'rb') as f:
    test_dataset = pickle.load(f) 

In [5]:
class CustomDataset(Dataset):
    def __init__(self, data_array, transform=None):
        self.data_array = data_array
        self.transform = transform

    def __len__(self):
        return len(self.data_array)

    def __getitem__(self, index):
        image_path, label, image = self.data_array[index]

        if self.transform:
            image = self.transform(image)
        if isinstance(label, str):
            label = int(label)
        
        return image_path, label, image
    
train_dataset = CustomDataset(train_dataset, transform=None)
val_dataset = CustomDataset(val_dataset, transform=None)
test_dataset = CustomDataset(test_dataset, transform=None)

In [6]:
batchSize = 1024
train_loader = DataLoader(train_dataset, batch_size=batchSize, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batchSize, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batchSize, shuffle=False)

In [7]:
model = vit_b_32(weights='IMAGENET1K_V1').to(device)
print(model)

VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate='none')
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=3072, out_features=768, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_a

In [8]:
features = []
labels_train = []
imagePath_train = []
def capture_input(module, input, output):
    # input[0] contains the input tensor to the FC layer for the entire batch
    for i in range(input[0].shape[0]):
        features.append(input[0][i].detach().cpu().numpy())

# Define the hook
handle = model.heads.head.register_forward_hook(capture_input)

model.eval()
with torch.no_grad():
    for i, (imagePaths, labels, inputs) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        _ = model(inputs)
        labels_train.extend(labels.detach().cpu().numpy().tolist())
        imagePath_train.extend(list(imagePaths))
        print('finished ' + str((i+1)*batchSize))

features_train = np.array(features)
np.savez('./data/interim/model1_features_train_' + topicModel + '.npz', features_train=features_train, labels_train=labels_train, imagePath_train=imagePath_train)
# # Remove the hook
# handle.remove()

  return torch._native_multi_head_attention(


finished 1024
finished 2048
finished 3072
finished 4096
finished 5120
finished 6144
finished 7168
finished 8192
finished 9216
finished 10240
finished 11264
finished 12288
finished 13312
finished 14336
finished 15360
finished 16384
finished 17408
finished 18432
finished 19456
finished 20480
finished 21504
finished 22528
finished 23552
finished 24576
finished 25600
finished 26624
finished 27648
finished 28672
finished 29696
finished 30720
finished 31744
finished 32768
finished 33792


In [9]:
features = []
labels_val = []
imagePath_val = []

model.eval()
with torch.no_grad():
    for i, (imagePaths, labels, inputs) in enumerate(val_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        _ = model(inputs)
        labels_val.extend(labels.detach().cpu().numpy().tolist())
        imagePath_val.extend(list(imagePaths))
        print('finished ' + str((i+1)*batchSize))

features_val = np.array(features)
np.savez('./data/interim/model1_features_val_' + topicModel + '.npz', features_val=features_val, labels_test=labels_val,imagePath_test=imagePath_val)

finished 1024
finished 2048
finished 3072
finished 4096
finished 5120
finished 6144
finished 7168
finished 8192
finished 9216


In [10]:
features = []
labels_test = []
imagePath_test = []

model.eval()
with torch.no_grad():
    for i, (imagePaths, labels, inputs) in enumerate(test_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        _ = model(inputs)
        labels_test.extend(labels.detach().cpu().numpy().tolist())
        imagePath_test.extend(list(imagePaths))
        print('finished ' + str((i+1)*batchSize))

features_test = np.array(features)
np.savez('./data/interim/model1_features_test_' + topicModel + '.npz', features_test=features_test, labels_test=labels_test,imagePath_test=imagePath_test)

finished 1024
finished 2048
finished 3072
finished 4096
finished 5120
finished 6144
finished 7168
finished 8192


In [11]:
features_train.shape, features_val.shape, features_test.shape

((33687, 768), (8422, 768), (7431, 768))