In [1]:
import os
import scipy.io
import numpy as np

sun_ds_dir = '/ibex/scratch/skoroki/datasets/SUN'
attrs_mat = scipy.io.loadmat(f'{sun_ds_dir}/SUNAttributeDB/attributeLabels_continuous.mat')
images_mat = scipy.io.loadmat(f'{sun_ds_dir}/SUNAttributeDB/images.mat')
# attrs = scipy.io.loadmat(f'{sun_ds_dir}/SUNAttributeDB/attributes.mat')

In [2]:
attrs = attrs_mat['labels_cv']
images = [im[0][0] for im in images_mat['images']]
class_names = [os.path.dirname(img) for img in images]
unique_class_names = sorted(list(set(class_names)))
labels = np.array([unique_class_names.index(c) for c in class_names])
n_classes = len(unique_class_names)

class_idx = [np.where(labels == c)[0] for c in range(n_classes)]
class_attrs = np.array([attrs[idx].mean(axis=0) for idx in class_idx])

In [3]:
class_attrs = class_attrs / np.linalg.norm(class_attrs, axis=1, keepdims=True)

In [4]:
np.save('/ibex/scratch/skoroki/datasets/SUN/attributes', class_attrs)
np.save('/ibex/scratch/skoroki/datasets/SUN/image_files', images)
np.save('/ibex/scratch/skoroki/datasets/SUN/labels', labels)

In [7]:
big_sun_ds_dir = '/ibex/scratch/skoroki/datasets/SUN397'
partitions_dir = os.path.join(big_sun_ds_dir, 'partitions')
train_partitions_files = [os.path.join(partitions_dir, f) for f in os.listdir(partitions_dir) if f.startswith('Training')]
test_partitions_files = [os.path.join(partitions_dir, f) for f in os.listdir(partitions_dir) if f.startswith('Testing')]

In [12]:
train_imgs = [img for f in train_partitions_files for img in open(f).read().splitlines()]
test_imgs = [img for f in test_partitions_files for img in open(f).read().splitlines()]

In [23]:
big_sun_imgs = set(train_imgs) | set(test_imgs)
new_imgs = [x for x in images if not f'/{x}' in big_sun_imgs]

In [26]:
len(new_imgs)

6872

In [35]:
train_size = 10000
val_size = (len(images) - train_size) // 2
test_size = (len(images) - train_size) // 2

shuffling = np.random.RandomState(42).permutation(len(images))
train_idx = shuffling[:train_size]
val_idx = shuffling[train_size:train_size + val_size]
test_idx = shuffling[train_size + val_size:]

In [37]:
np.save('/ibex/scratch/skoroki/datasets/SUN/train_idx', train_idx)
np.save('/ibex/scratch/skoroki/datasets/SUN/val_idx', val_idx)
np.save('/ibex/scratch/skoroki/datasets/SUN/test_idx', test_idx)