In [3]:
#Imports

import numpy as np
import scipy.io as sio

from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt
import os

import torch
from torchvision import transforms
from PIL import Image

import torch.nn as nn

from transformers import SamModel, SamProcessor, CLIPVisionModel
import time

In [None]:
data = sio.loadmat('dataset')

x = data['data']
y = data['label']
y = y.reshape(-1,1)
y = np.array(y)
x = x.reshape(51,51,51,5900)
x = np.transpose(x,(3,0,1,2))

xx_bin = x[:,0,:,:]
xy_bin = x[:,:,0,:]
xz_bin = x[:,:,:,0]

In [None]:
# Extracting imgage-level features using DINOV2 Models

preprocess = transforms.Compose([
    transforms.ToTensor(),
])

def make_features(img):

    #Visualizing Initial Images
    # plt.imshow(img)
    # plt.show()

    #print (img.shape)

    #Changing the shape to be able to be put in 14x14 patches 
    x_shape = img.shape[0]
    y_shape = img.shape[1]

    x = x_shape//14
    x_new_shape = x*14

    y = y_shape//14
    y_new_shape = y*14

    img_new = img[:x_new_shape, :y_new_shape]

    #print (img_new.shape)

    #Converting from RGBA to RGB
    img_rgb = Image.fromarray(img_new*255).convert('RGB')

    #Converting to tensors
    img_t = preprocess(img_rgb)

    #Visualizing Processed images
    # image = img_t.permute(1, 2, 0)
    # plt.imshow(image)
    # plt.show()

    img_t = img_t.unsqueeze(0) #batches as expected 

    #If looking into pacthes, uncomment. 
    # output = dinov2_vitb14.forward_features(img_t)
    # patches = output['x_norm_patchtokens'].detach().numpy()
    # print (patches.shape)

    return dinov2_vitb14(img_t).detach().numpy()


# Loading the images

#image_path = 'ALL/ImagesToKeep'

#desired_files = [file for file in os.listdir(image_path) if not file.startswith('.')]

for ver in ['s','b','l','g']: 

    start_time = time.time()
    dinov2_vitb14 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vit{}14'.format(ver))

    count  = 0 
    xx_features = []
    for x in xx_bin:

        img_features = make_features(x) 
        xx_features.append(img_features)
        print (count)
        count = count + 1

    xx_features = np.array(xx_features)
    new_filename =  '11_12_Digital_Features/xx_dino_' + ver + '.npy'
    np.save(new_filename, xx_features)
    
    count  = 0 
    xy_features = []
    for x in xy_bin:

        img_features = make_features(x) 
        xy_features.append(img_features)
        print (count)
        count = count + 1

    new_filename =  '11_12_Digital_Features/xy_dino_' + ver + '.npy'
    np.save(new_filename, xy_features)

    count  = 0 
    xz_features = []
    for x in xz_bin:

        img_features = make_features(x) 
        xz_features.append(img_features)
        print (count)
        count = count + 1

    new_filename =  '11_12_Digital_Features/xz_dino_' + ver + '.npy'
    np.save(new_filename, xz_features)

    print('Done with {}.npy'.format(ver))

    print("--- %s seconds ---" % (time.time() - start_time))


In [None]:
# Extracting imgage-level features using CLIP Models

preprocess_CLIP = transforms.Compose([
    #transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

def make_clip_features(img,ver):

    img_new = np.kron(img, np.ones((5, 5)))#, dtype=img.dtype))

    img_new = img_new[:224,:224]

    # size = ver[1:3]
    # size = int(size)

    # x_shape = img.shape[0]
    # y_shape = img.shape[1]

    # if x_shape <= y_shape:
    #     min = x_shape
    # else:
    #     min = y_shape

    # val = min//size
    # new_shape = val*size

    # img_new = img[:new_shape, :new_shape]

    # print (img.shape)
    # plt.imshow(img)
    # plt.show()
    # print (img_new.shape)

    img_rgb = Image.fromarray(img_new*255).convert("RGB")
    img_t = preprocess_CLIP(img_rgb)

    # image = img_t.permute(1, 2, 0)
    # plt.imshow(image)
    # plt.show()
    img_t = img_t.unsqueeze(0)

    #print (clip_model.eval())
    
    outputs = clip_model(img_t)

    output = outputs.pooler_output
    features = output.detach().numpy()
    
    features = np.array(features)
    features = np.squeeze(features)

    #print (features.shape)

    return features

In [None]:


for ver in ['b16','b32','l14']:
    start_time = time.time()
    if ver[0] == 'b':
        clip_model = CLIPVisionModel.from_pretrained('openai/clip-vit-base-patch{}'.format(ver[1:]))
        #clip_processor = CLIPProcessor.from_pretrained('openai/clip-vit-base-patch{}'.format(ver[1:]))
    else:
        clip_model = CLIPVisionModel.from_pretrained('openai/clip-vit-large-patch{}'.format(ver[1:]))
        #clip_processor = CLIPProcessor.from_pretrained('openai/clip-vit-large-patch{}'.format(ver[1:]))

    count  = 0 
    xx_features = []
    for x in xx_bin:

        img_features = make_clip_features(x, ver) 
        xx_features.append(img_features)
        print (count)
        count = count + 1

    xx_features = np.array(xx_features)
    new_filename =  '11_12_Digital_Features/xx_clip_' + ver + '.npy'
    np.save(new_filename, xx_features)
    
    count  = 0 
    xy_features = []
    for x in xy_bin:

        img_features = make_clip_features(x, ver) 
        xy_features.append(img_features)
        print (count)
        count = count + 1

    new_filename =  '11_12_Digital_Features/xy_clip_' + ver + '.npy'
    np.save(new_filename, xy_features)

    count  = 0 
    xz_features = []
    for x in xz_bin:

        img_features = make_clip_features(x, ver) 
        xz_features.append(img_features)
        print (count)
        count = count + 1

    new_filename =  '11_12_Digital_Features/xz_clip_' + ver + '.npy'
    np.save(new_filename, xz_features)

    print('Done with {}.npy'.format(ver))

    print("--- %s seconds ---" % (time.time() - start_time))

In [7]:
#Loading the SAM Model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")

In [None]:
# Extracting imgage-level features using SAM Model

preprocess_SAM = transforms.Compose([
    #transforms.Resize((1024, 1024)),
    transforms.ToTensor(),
])


def make_sam_features(img):

    # plt.imshow(img)
    # plt.show()
    # print (img.shape)

    # x_shape = img.shape[0]
    # y_shape = img.shape[1]

    # if x_shape <= y_shape:
    #     min = x_shape
    # else:
    #     min = y_shape

    # val = min//14
    # new_shape = val*14

    # img_new = img[:new_shape, :new_shape]#, :]

    img_new = np.kron(img, np.ones((21, 21)))#, dtype=img.dtype))

    img_new = img_new[:1024,:1024]

    img_rgb = Image.fromarray(img_new*255).convert("RGB")

    #print (img_rgb.size)

    img_t = preprocess_SAM(img_rgb)

    # image = img_t.permute(1, 2, 0)
    # plt.imshow(image)
    # plt.show()

    img_t = img_t.unsqueeze(0)

    #print (img_t.shape)

    image_embeddings = model.get_image_embeddings(img_t) 

    #print (image_embeddings.shape)

    # # Global Average Pooling
    gap = nn.AdaptiveAvgPool2d((1, 1))

    pooled_features = gap(image_embeddings)

    new_features = pooled_features.detach().numpy()

    new_features = np.squeeze(new_features)

    #print (new_features.shape)

    return new_features
    

In [None]:

start_time = time.time()
count  = 0 
xx_features = []
for x in xx_bin:

    img_features = make_sam_features(x) 
    xx_features.append(img_features)
    print (count)
    count = count + 1

xx_features = np.array(xx_features)
new_filename =  '11_12_Digital_Features/xx_sam.npy'
np.save(new_filename, xx_features)

count  = 0 
xy_features = []
for x in xy_bin:

    img_features = make_sam_features(x) 
    xy_features.append(img_features)
    print (count)
    count = count + 1

new_filename =  '11_12_Digital_Features/xy_sam.npy'
np.save(new_filename, xy_features)

count  = 0 
xz_features = []
for x in xz_bin:

    img_features = make_sam_features(x) 
    xz_features.append(img_features)
    print (count)
    count = count + 1

new_filename =  '11_12_Digital_Features/xz_sam.npy'
np.save(new_filename, xz_features)


print('Done with sam')
print("--- %s seconds ---" % (time.time() - start_time))