In [1]:
import argparse
import os
import torch
import numpy as np

from utils.config import create_config
from utils.common_config import get_criterion, get_model, get_train_dataset,\
                                get_val_dataset, get_train_dataloader,\
                                get_val_dataloader, get_train_transformations,\
                                get_val_transformations, get_optimizer,\
                                adjust_learning_rate
from utils.evaluate_utils import contrastive_evaluate
from utils.memory import MemoryBank
from utils.train_utils import pretraining
from utils.utils import fill_memory_bank
from termcolor import colored
from matplotlib.axes._axes import _log as matplotlib_axes_logger
matplotlib_axes_logger.setLevel('ERROR')
from sklearn.cluster import KMeans
import sklearn

In [2]:
attrs_default = ["5_o_Clock_Shadow", "Arched_Eyebrows", "Attractive", "Bags_Under_Eyes", "Bald", "Bangs", "Big_Lips", "Big_Nose", "Black_Hair", "Blond_Hair", "Blurry", "Brown_Hair", "Bushy_Eyebrows", "Chubby", "Double_Chin", "Eyeglasses", "Goatee", "Gray_Hair", "Heavy_Makeup", "High_Cheekbones", "Male", "Mouth_Slightly_Open", "Mustache", "Narrow_Eyes", "No_Beard", "Oval_Face", "Pale_Skin", "Pointy_Nose", "Receding_Hairline", "Rosy_Cheeks", "Sideburns", "Smiling", "Straight_Hair", "Wavy_Hair", "Wearing_Earrings", "Wearing_Hat", "Wearing_Lipstick", "Wearing_Necklace", "Wearing_Necktie", "Young"]

In [3]:
p = create_config("configs/env.yml", "configs/pretext/pretraining.yml", 128, 1)
p['batch_size'] = 128
p['epochs'] = 1

In [4]:
import torchvision
import torchvision.transforms as transforms
dataset = torchvision.datasets.ImageFolder(root='/home/mehmetyavuz/datasets/YFCC392K/', transform=transforms.Compose([
                #transforms.Resize(p['img_size']),
                transforms.CenterCrop(p['augmentation_kwargs']['crop_size']),
                transforms.ToTensor(),
                transforms.Normalize(**p['augmentation_kwargs']['normalize'])
            ]))

In [5]:
# CUDNN
print(colored('Set CuDNN benchmark', 'blue')) 
torch.backends.cudnn.benchmark = True

[34mSet CuDNN benchmark[0m


In [6]:
val_transforms = get_val_transformations(p)
print('Val transforms:', val_transforms)
print(colored('Build MemoryBank', 'blue'))
base_dataset = get_train_dataset(p, val_transforms, to_augmented_dataset=True)
base_dataloader = get_val_dataloader(p, base_dataset)
memory_bank_base = MemoryBank(len(base_dataset), 
                            p['model_kwargs']['features_dim'],
                            p['model_kwargs']['num_heads'], p['num_classes'], p['criterion_kwargs']['temperature'])

Val transforms: {'standard': Compose(
    CenterCrop(size=(128, 128))
    ToTensor()
    Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
)}
[34mBuild MemoryBank[0m


In [7]:
def get_cmap(n, name='hsv'):
    return plt.cm.get_cmap(name, n)

In [8]:
import glob
files = 'results001/CelebA/SimCLR-B128/finetuning_model.pth.tar'

In [9]:
import numpy as np
import matplotlib.pyplot as plt

from matplotlib.ticker import NullFormatter
from sklearn import manifold, datasets
from time import time

n_samples = len(base_dataset)
n_components = 2
perplexity = 15

cmap = get_cmap(10)

In [10]:
# Model
print(colored('Retrieve model', 'blue'))
model = get_model(p)
model = torch.nn.DataParallel(model)
model.load_state_dict(torch.load(files, map_location='cpu')['model'], strict=True)
print('Model is {}'.format(model.__class__.__name__))
print('Model parameters: {:.2f}M'.format(sum(p.numel() for p in model.parameters()) / 1e6))
model = model.cuda()
# Fill memory bank
print('Fill memory bank for kNN...')
fill_memory_bank(base_dataloader, model, memory_bank_base)

[34mRetrieve model[0m
Model is DataParallel
Model parameters: 78.64M
Fill memory bank for kNN...


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Fill Memory Bank [0/13]


In [11]:
X = np.squeeze(memory_bank_base.features.cpu().numpy())
y = np.squeeze(memory_bank_base.targets.cpu().numpy())

In [12]:
val_transforms = get_val_transformations(p, dictionary=False)
print(colored('Build MemoryBank', 'blue'))
YFCC_dataset = get_train_dataset(p, val_transforms, to_augmented_dataset=False, YFCC=True)
YFCC_dataloader = get_val_dataloader(p, YFCC_dataset)
memory_bank_YFCC = MemoryBank(len(YFCC_dataset), 
                            p['model_kwargs']['features_dim'],
                            p['model_kwargs']['num_heads'], p['num_classes'], p['criterion_kwargs']['temperature'])

[34mBuild MemoryBank[0m


In [13]:
print('Fill memory bank for kNN...')
fill_memory_bank(YFCC_dataloader, model, memory_bank_YFCC, isYFCC=True)

Fill memory bank for kNN...
Fill Memory Bank [0/3065]
Fill Memory Bank [100/3065]
Fill Memory Bank [200/3065]
Fill Memory Bank [300/3065]
Fill Memory Bank [400/3065]
Fill Memory Bank [500/3065]
Fill Memory Bank [600/3065]
Fill Memory Bank [700/3065]
Fill Memory Bank [800/3065]
Fill Memory Bank [900/3065]
Fill Memory Bank [1000/3065]
Fill Memory Bank [1100/3065]
Fill Memory Bank [1200/3065]
Fill Memory Bank [1300/3065]
Fill Memory Bank [1400/3065]
Fill Memory Bank [1500/3065]
Fill Memory Bank [1600/3065]
Fill Memory Bank [1700/3065]
Fill Memory Bank [1800/3065]
Fill Memory Bank [1900/3065]
Fill Memory Bank [2000/3065]
Fill Memory Bank [2100/3065]
Fill Memory Bank [2200/3065]
Fill Memory Bank [2300/3065]
Fill Memory Bank [2400/3065]
Fill Memory Bank [2500/3065]
Fill Memory Bank [2600/3065]
Fill Memory Bank [2700/3065]
Fill Memory Bank [2800/3065]
Fill Memory Bank [2900/3065]
Fill Memory Bank [3000/3065]


In [14]:
X_YFCC = np.squeeze(memory_bank_YFCC.features.cpu().numpy())

In [15]:
with open('X_YFCC.npy', 'wb') as f:
    np.save(f, X_YFCC)

In [16]:
with open('y.npy', 'wb') as f:
    np.save(f, y)

In [17]:
with open('X.npy', 'wb') as f:
    np.save(f, X)