In [1]:
try:
    import torch
    import numpy as np
    from pathlib import Path
    import os
    import os.path as osp
    import importlib
    importlib.reload(dp)
    importlib.reload(Config)
    importlib.reload(runner)
except NameError: # It hasn't been imported yet
        import data_load.data_provider as dp
        import config.config_flags as Config
        import runner

In [40]:
#Loads tieredImageNet embeddings by default. Set a different dataset in config_flags.py

#Pick from train, val, test. 
#Debug param keeps a copy of the pkl embeddings data
dataset_type_pkl = 'val' #This represents the original pkl dataset type to be loaded
dataProvider = dp.DataProvider(dataset_type_pkl, debug=True, verbose=True) 

embeddings_data = dataProvider.get_embeddings_data()

Path fetched: ..\embeddings\tieredImageNet\center\val_embeddings.pkl


In [41]:
#Get first num elements in the dictionary - helper method
def head(dict_obj, num=5):
    count = 0
    for key,value in dict_obj.items():
        print("Key: " + str(key), "value: " + str(value))
        count+=1
        if(count == num):
            return

In [42]:
# Format of each element in "keys": _-classLabel-classLabel_filename.JPEG
# List of class labels in miniImageNet https://gist.github.com/kaixin96/ffb88bd025fc05deb2d7f1378e9b7282

head(embeddings_data, 3)

Key: labels value: [0 0 0 ... 0 0 0]
Key: embeddings value: [[3.2385639e-03 1.8942569e-04 1.3159506e-02 ... 5.6467261e-03
  2.8360044e-04 5.4967026e-03]
 [1.0305644e-06 5.7039782e-05 6.0153725e-03 ... 7.2171907e-03
  7.8330771e-04 2.3086595e-03]
 [3.9626868e-04 2.3760945e-03 3.5099715e-03 ... 1.6851516e-03
  2.4224888e-03 6.0567213e-04]
 ...
 [5.0420989e-04 1.1343597e-03 7.8602228e-03 ... 4.0134267e-04
  1.4575045e-03 3.5321803e-04]
 [0.0000000e+00 1.9685794e-03 1.7314308e-03 ... 1.8279791e-03
  1.4138945e-02 5.3597195e-04]
 [4.2886622e-04 2.1248795e-04 5.9927190e-03 ... 4.9523721e-03
  8.7791802e-03 7.8218954e-04]]
Key: keys value: ['1072646529445394375-n02099601-n02099601_2439.JPEG'
 '1113032556112010943-n02102480-n02102480_7854.JPEG'
 '1120287575005342714-n03496892-n03496892_17606.JPEG' ...
 '554146050667129952-n04067472-n04067472_14426.JPEG'
 '574482493408202056-n02930766-n02930766_16758.JPEG'
 '585246420666931655-n03930630-n03930630_1767.JPEG']


In [43]:
#Note the labels key in the dictionary does not represent class labels. The real class labels are inside the filename of each embedding as highlighted in the above cell.
print("===pkl embeddings file info===")
labels = np.array(embeddings_data['labels'])
print("labels shape:", labels.shape, "Unique elements: ", np.unique(labels))

embedding_values = np.array(embeddings_data['embeddings'])
print("embedding values shape: ", embedding_values.shape)

keys = np.array(embeddings_data['keys'])
print("keys shape:", keys.shape)

===pkl embeddings file info===
labels shape: (124000,) Unique elements:  [0]
embedding values shape:  (124000, 640)
keys shape: (124000,)


In [44]:
#Raw embeddings data (pkl) is indexed/organized into two dictionaries. Indexing helps us construct our n-way k shot problems
class_image_file_dict, image_file_embeddings_dict = dataProvider.get_indexed_data()
print("class label to image filenames dictionary:", len(class_image_file_dict))
print("image filename to embeddings data dictionary", len(image_file_embeddings_dict))

class label to image filenames dictionary: 97
image filename to embeddings data dictionary 124000


In [45]:
head(class_image_file_dict,1)

Key: n02099601 value: ['n02099601_2439.JPEG' 'n02099601_1654.JPEG' 'n02099601_6124.JPEG' ...
 'n02099601_2460.JPEG' 'n02099601_3411.JPEG' 'n02099601_12990.JPEG']


In [46]:
head(image_file_embeddings_dict,1)

Key: n02099601_2439.JPEG value: [3.23856389e-03 1.89425686e-04 1.31595060e-02 1.15267793e-03
 2.22466144e-04 4.02899343e-04 3.83161893e-03 0.00000000e+00
 7.68344710e-03 3.35025042e-02 1.18376885e-03 4.46909748e-04
 1.17685609e-02 0.00000000e+00 4.11629444e-04 2.45084683e-03
 3.62301944e-03 6.37231424e-05 8.03190633e-04 5.83129330e-03
 3.83506389e-03 4.93042928e-04 1.56671857e-03 1.67190167e-03
 9.08649701e-04 4.05392377e-04 7.74350483e-04 1.62679775e-04
 1.03507342e-03 3.15711647e-03 1.14170453e-02 1.16079105e-02
 2.23458093e-03 8.25890992e-03 4.96176617e-05 0.00000000e+00
 5.92266209e-04 0.00000000e+00 2.19947123e-03 1.53862906e-03
 2.45403801e-03 7.79392198e-03 5.06709237e-03 1.04401568e-02
 2.55171629e-03 0.00000000e+00 0.00000000e+00 3.33414832e-03
 4.36473219e-03 3.51644959e-03 0.00000000e+00 2.87072361e-03
 1.30566163e-03 6.66173524e-04 4.53151239e-04 5.39812667e-04
 6.82270504e-04 5.15033666e-04 3.33822286e-03 1.56134670e-03
 9.62726027e-03 2.58035073e-03 1.79423892e-04 1.23264

In [100]:
#Save embeddings data for validation (to be used in TASML)
#Note if we want, we can extract training + val from the embeddings validation data (as done in this cell)

db_title = Config.EMBEDDINGS_DATASET_NAME
sample_size = Config.TRAIN_SAMPLE_SIZE #Kept small for demonstration. Authors use 30,000
tr_size = Config.TRAINING_NUM_OF_EXAMPLES_PER_CLASS
val_size = Config.VALIDATION_NUM_OF_EXAMPLES_PER_CLASS
num_classes = Config.NUM_OF_CLASSES
checkpoint_root = Config.CHECKPOINT_ROOT
save_path = osp.join(checkpoint_root, "%s_%s_%i_%i_%i" % (dataset_type_pkl, db_title, sample_size, tr_size, val_size))

dataProvider.create_db(sample_size, num_classes, tr_size, val_size)
dataProvider.save_db(save_path)

embedding_array:  (5, 16, 640)
task_sig (640,)
label_array (5, 16, 1)
path_array (5, 16)
embedding_array:  (5, 16, 640)
task_sig (640,)
label_array (5, 16, 1)
path_array (5, 16)
embedding_array:  (5, 16, 640)
task_sig (640,)
label_array (5, 16, 1)
path_array (5, 16)
embedding_array:  (5, 16, 640)
task_sig (640,)
label_array (5, 16, 1)
path_array (5, 16)
embedding_array:  (5, 16, 640)
task_sig (640,)
label_array (5, 16, 1)
path_array (5, 16)
embedding_array:  (5, 16, 640)
task_sig (640,)
label_array (5, 16, 1)
path_array (5, 16)
embedding_array:  (5, 16, 640)
task_sig (640,)
label_array (5, 16, 1)
path_array (5, 16)
embedding_array:  (5, 16, 640)
task_sig (640,)
label_array (5, 16, 1)
path_array (5, 16)
embedding_array:  (5, 16, 640)
task_sig (640,)
label_array (5, 16, 1)
path_array (5, 16)
embedding_array:  (5, 16, 640)
task_sig (640,)
label_array (5, 16, 1)
path_array (5, 16)
embedding_array:  (5, 16, 640)
task_sig (640,)
label_array (5, 16, 1)
path_array (5, 16)
embedding_array:  (5,

In [101]:
#task_sig is a class-wise mean and then an example-wise mean of the normalized data
db = dataProvider.db 
db

[(array([2.80899409e-03, 2.38331158e-03, 2.52066985e-02, 1.57642298e-02,
         1.25760999e-02, 3.52510815e-03, 8.16040662e-03, 6.68201302e-03,
         7.32241587e-03, 1.85373920e-02, 1.00841566e-02, 3.31096431e-04,
         1.96523857e-02, 3.96429678e-03, 1.70445294e-03, 1.98816391e-02,
         9.74992870e-03, 6.10679008e-03, 1.00305936e-02, 2.39373371e-02,
         1.22514379e-02, 4.42732138e-03, 6.47557817e-03, 5.59985167e-03,
         2.53053132e-03, 2.85733341e-03, 8.54702646e-03, 2.34053581e-03,
         1.16750339e-03, 2.24219735e-02, 4.04028139e-03, 2.24218632e-02,
         9.47043836e-03, 2.72293075e-03, 1.86426677e-03, 2.49086068e-03,
         5.04688682e-03, 1.20139770e-03, 1.04596247e-02, 2.80653856e-02,
         5.21230894e-03, 1.55133543e-02, 1.14510506e-02, 2.81946083e-03,
         1.62437044e-02, 6.25189919e-03, 3.36237781e-04, 9.23604546e-03,
         7.80715656e-03, 1.35374265e-02, 3.54892421e-03, 1.37474215e-02,
         3.08073392e-03, 2.24344907e-03, 6.05376363

In [1]:
try:
    import torch
    import numpy as np
    from pathlib import Path
    import os
    import os.path as osp
    import importlib
    importlib.reload(dp)
    importlib.reload(Config)
    importlib.reload(runner)
except NameError: # It hasn't been imported yet
        import data_load.data_provider as dp
        import config.config_flags as Config
        import runner

In [23]:
test_db = runner.unpickle("checkpoint\\test_tieredImageNet_1_0_600")
train_db = runner.unpickle("checkpoint\\train_tieredImageNet_300_1_15")

In [54]:
weights_path = runner.populate_db()
print(weights_path)
alpha_weights = runner.unpickle(weights_path)
print(alpha_weights.shape)

checkpoint/train_tieredImageNet_300_1_15-test_tieredImageNet_1_0_600
(300, 1)


In [2]:
alpha_weights, train_tasks = runner.top_m_filtering() #m = 100 in config_flags

In [14]:
print(alpha_weights.shape)
print(len(train_tasks))
print(len(train_tasks[0][2][0]))

(100, 1)
100
16


In [33]:
print(train_tasks[0][1].shape)
print(np.unique(train_tasks[0][1]))

unique_classes = np.unique(train_tasks[0][1])
for class_id in unique_classes:
    print(class_id)

(5, 16, 1)
[0 1 2 3 4]
0
1
2
3
4


In [35]:
print(train_tasks[0][2][0].shape)

(16,)


In [42]:
unique_classes[:1]

array([0])