In [None]:
! pip install transformers

In [None]:
import random
import torch
import numpy as np

from google.colab import drive
from PIL import Image
from os import path
from tqdm import tqdm
from transformers import ViTFeatureExtractor, ViTForImageClassification

In [None]:
drive.mount('/content/drive')

In [None]:
np.random.seed(42)
torch.manual_seed(42)
random.seed(42)

In [None]:
IMG_PATH = f'/content/drive/My Drive/CLIP_Facial_Impressions/omi/images'

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-large-patch32-384')
model = ViTForImageClassification.from_pretrained('google/vit-large-patch32-384').to(device)

In [None]:
first_impression_images = [path.join(IMG_PATH, f'{i}.jpg') for i in range(1,1005)]

In [None]:
vecs = []

for img in tqdm(first_impression_images):
  image = Image.open(path.join(IMG_PATH, img))
  inputs = feature_extractor(images=image, return_tensors="pt").to(device)

  with torch.no_grad():
    outputs = model(**inputs,output_hidden_states=True)
    vec = outputs.hidden_states[-1].mean(axis=1)
    vecs.append(vec)

In [None]:
final_vecs = torch.stack(vecs)
final_vecs = final_vecs.squeeze(1)
final_vecs = final_vecs.to('cpu')

In [None]:
np_vecs = final_vecs.numpy()
np.save(f'/content/drive/My Drive/CLIP_Facial_Impressions/vit_vecs.npy', np_vecs)