In [1]:
import numpy as np
import os
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data.dataloader import DataLoader
import matplotlib.pyplot as plt
from torchvision.datasets import CIFAR10, DTD
import timm
import detectors

from torch.nn.functional import softmax

In [2]:
########### Image preprocessing modules ###########

transform_train = transforms.Compose([
    transforms.RandomCrop(size=32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5070751592371323, 0.48654887331495095, 0.4409178433670343],
                         std=[0.2673342858792401, 0.2564384629170883, 0.27615047132568404])
    # inherited from https://github.com/kaidic/LDAM-DRW/blob/master/cifar_train.py
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

transform_DTD = transforms.Compose([
    transforms.RandomCrop(size=32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
    # inherited from https://github.com/OliverXUZY/FM_weights/blob/124360fdfb2027319993a2be48caf7cb7a0887aa/src/dataset.py#L56
])

In [3]:
########### dataset loading ###########
from collections import Counter, OrderedDict

# CIFAR-10 dataset 
train_dataset = CIFAR10(root='data/', train=True, transform=transform_train, download=False)
test_dataset = CIFAR10(root='data/', train=False, transform=transform_test, download=False)

# DTD dataset 
DTD_dataset = DTD(root='data/', transform=transform_DTD, download=False)

print(dict(Counter(train_dataset.targets)))
print(len(dict(Counter(train_dataset.targets))))

print(dict(Counter(test_dataset.targets)))
print(len(dict(Counter(test_dataset.targets))))

{6: 5000, 9: 5000, 4: 5000, 1: 5000, 2: 5000, 7: 5000, 8: 5000, 3: 5000, 5: 5000, 0: 5000}
10
{3: 1000, 8: 1000, 0: 1000, 6: 1000, 1: 1000, 9: 1000, 5: 1000, 7: 1000, 4: 1000, 2: 1000}
10


In [4]:
########### Pretrained model ###########
# https://huggingface.co/edadaltocg/resnet18_cifar10
model = timm.create_model("resnet18_cifar10", pretrained=True)
# remove the last fully-connected layer, output dimension: 512
new_model = torch.nn.Sequential(OrderedDict([*(list(model.named_children())[:-1])]))

In [5]:
########### training dataset feature, logits, and pics ##########

model.eval()
new_model.eval()

logits_mat = torch.empty((len(train_dataset), 10))
features_mat = torch.empty((len(train_dataset), 512))
labels_mat = torch.empty((len(train_dataset), 1))

with torch.no_grad():
    for i in range(len(train_dataset)):
        x, y = train_dataset[i]
        x = x.unsqueeze(0)  # Add batch dimension

        logits = model(x)
        logits_mat[i] = logits.squeeze(0)

        features = new_model(x)
        features_mat[i] = features.squeeze(0)

        labels_mat[i] = y

probs_mat = softmax(logits_mat, dim=1)
features, probs, labels = torch.Tensor.numpy(features_mat), torch.Tensor.numpy(probs_mat), torch.Tensor.numpy(labels_mat)

np.save('./train/CIFAR10_ResNet18_ce_pretrain_features_train.npy', features)
np.save('./train/CIFAR10_ResNet18_ce_pretrain_probs_train.npy', probs)
np.save('./train/CIFAR10_ResNet18_ce_pretrain_labels_train.npy', labels)

In [5]:
########### test dataset feature, logits, and pics ##########

model.eval()
new_model.eval()

logits_mat = torch.empty((len(test_dataset), 10))
features_mat = torch.empty((len(test_dataset), 512))
labels_mat = torch.empty((len(test_dataset), 1))

with torch.no_grad():
    for i in range(len(test_dataset)):
        x, y = test_dataset[i]
        x = x.unsqueeze(0)  # Add batch dimension

        logits = model(x)
        logits_mat[i] = logits.squeeze(0)

        features = new_model(x)
        features_mat[i] = features.squeeze(0)

        labels_mat[i] = y

probs_mat = softmax(logits_mat, dim=1)
features, probs, labels = torch.Tensor.numpy(features_mat), torch.Tensor.numpy(probs_mat), torch.Tensor.numpy(labels_mat)

np.save('./test/CIFAR10_ResNet18_ce_pretrain_features_test.npy', features)
np.save('./test/CIFAR10_ResNet18_ce_pretrain_probs_test.npy', probs)
np.save('./test/CIFAR10_ResNet18_ce_pretrain_labels_test.npy', labels)

In [12]:
########### DTD dataset feature, logits, and pics ##########

model.eval()
new_model.eval()

logits_mat = torch.empty((len(DTD_dataset), 10))
features_mat = torch.empty((len(DTD_dataset), 512))
labels_mat = torch.empty((len(DTD_dataset), 1))

with torch.no_grad():
    for i in range(len(DTD_dataset)):
        x, y = DTD_dataset[i]
        x = x.unsqueeze(0)  # Add batch dimension

        logits = model(x)
        logits_mat[i] = logits.squeeze(0)

        features = new_model(x)
        features_mat[i] = features.squeeze(0)

        labels_mat[i] = y

probs_mat = softmax(logits_mat, dim=1)
features, probs, labels = torch.Tensor.numpy(features_mat), torch.Tensor.numpy(probs_mat), torch.Tensor.numpy(labels_mat)

np.save('./DTD/CIFAR10_ResNet18_ce_pretrain_features_DTD.npy', features)
np.save('./DTD/CIFAR10_ResNet18_ce_pretrain_probs_DTD.npy', probs)
np.save('./DTD/CIFAR10_ResNet18_ce_pretrain_labels_DTD.npy', labels)