In [2]:
import numpy as np
import os
import scipy.io
from sklearn.metrics import classification_report,confusion_matrix


In [3]:
#Please add the folder name of the dataset to run it on different dataset.
dataset = 'AWA2'

From the .mat files extract all the features from resnet and the attribute splits. 
- The res101 contains features and the corresponding labels.
- att_splits contains the different splits for trainval, train, val and test set.

In [4]:
res101 = scipy.io.loadmat('C:/Users/Zach/Documents/NorthEastern/SmallDataMachineLearning/Code/ESZSL/Data/xlsa17/data/'+dataset+'/res101.mat')
att_splits = scipy.io.loadmat('C:/Users/Zach/Documents/NorthEastern/SmallDataMachineLearning/Code/ESZSL/Data/xlsa17/data/'+dataset+'/att_splits.mat')

In [5]:
res101.keys()

dict_keys(['__header__', '__version__', '__globals__', 'features', 'image_files', 'labels'])

In [6]:
att_splits.keys()

dict_keys(['__header__', '__version__', '__globals__', 'allclasses_names', 'att', 'original_att', 'test_seen_loc', 'test_unseen_loc', 'train_loc', 'trainval_loc', 'val_loc'])

In [7]:
#Using the correct naming conventions to get the loctions
trainval_loc = 'trainval_loc'
train_loc = 'train_loc'
val_loc = 'val_loc'
test_loc = 'test_unseen_loc'

We need the corresponding ground-truth labels/classes for each training example for all our train, val, trainval and test set according to the split locations provided.
In this example we have used the `CUB` dataset which has 200 unique classes overall.

In [8]:
labels = res101['labels']
labels_train = labels[np.squeeze(att_splits[train_loc]-1)]
labels_val = labels[np.squeeze(att_splits[val_loc]-1)]
labels_trainval = labels[np.squeeze(att_splits[trainval_loc]-1)]
labels_test = labels[np.squeeze(att_splits[test_loc]-1)]

In [9]:
labels_train[:10,:]

array([[43],
       [22],
       [43],
       [38],
       [19],
       [18],
       [46],
       [19],
       [49],
       [37]], dtype=uint8)

In [10]:
unique_labels = np.unique(labels)
unique_labels

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50],
      dtype=uint8)

In a typical zero-shot learning scenario, there are no overlapping classes between training and testing phase, i.e the train classes are completely different from the test classes. So let us verify if there are any overlapping classes in the test and train scenario.
- During training phase we have `z` classes
- During the testing phase we have `z'` classes

In [11]:
train_labels_seen = np.unique(labels_train)
val_labels_unseen = np.unique(labels_val)
trainval_labels_seen = np.unique(labels_trainval)
test_labels_unseen = np.unique(labels_test)

In [12]:
print("Number of overlapping classes between train and val:",len(set(train_labels_seen).intersection(set(val_labels_unseen))))
print("Number of overlapping classes between trainval and test:",len(set(trainval_labels_seen).intersection(set(test_labels_unseen))))

Number of overlapping classes between train and val: 0
Number of overlapping classes between trainval and test: 0


In [13]:
i = 0
for labels in train_labels_seen:
    labels_train[labels_train == labels] = i    
    i = i+1
j = 0
for labels in val_labels_unseen:
    labels_val[labels_val == labels] = j
    j = j+1
k = 0
for labels in trainval_labels_seen:
    labels_trainval[labels_trainval == labels] = k
    k = k+1
l = 0
for labels in test_labels_unseen:
    labels_test[labels_test == labels] = l
    l = l+1

Let us denote the features X ∈ [d×m] available at training stage, where d is the dimensionality
of the data, and m is the number of instances. We are useing resnet features which are extracted from `CUB` dataset.

In [14]:
X_features = res101['features']
train_vec = X_features[:,np.squeeze(att_splits[train_loc]-1)]
val_vec = X_features[:,np.squeeze(att_splits[val_loc]-1)]
trainval_vec = X_features[:,np.squeeze(att_splits[trainval_loc]-1)]
test_vec = X_features[:,np.squeeze(att_splits[test_loc]-1)]

In [15]:
print("Features for train:", train_vec.shape)
print("Features for val:", val_vec.shape)
print("Features for trainval:", trainval_vec.shape)
print("Features for test:", test_vec.shape)

Features for train: (2048, 16187)
Features for val: (2048, 7340)
Features for trainval: (2048, 23527)
Features for test: (2048, 7913)


#### Normalize the vectors

In [16]:
def normalization(vec,mean,std):
    sol = vec - mean
    sol1 = sol/std
    return sol1

In [17]:
# train_mean = train_vec.mean(axis=1, keepdims=True)
# train_std = np.std(train_vec, axis=1, keepdims = True)
# trainval_mean = trainval_vec.mean(axis=1, keepdims = True)
# trainval_std = np.std(trainval_vec, axis=1, keepdims=True)

# train_vec = normalization(train_vec, train_mean, train_std)
# val_vec = normalization(val_vec, train_mean, train_std)

# trainval_vec = normalization(trainval_vec, trainval_mean, trainval_std)
# test_vec = normalization(test_vec, trainval_mean, trainval_std)

Each of the classes in the dataset have an attribute (a) description. This vector is known as the `Signature matrix` of dimension S ∈ [0, 1]a×z. For training stage there are z classes and z' classes  for test S ∈ [0, 1]a×z'.

In [18]:
#Signature matrix
signature = att_splits['att']
train_sig = signature[:,(train_labels_seen)-1]
val_sig = signature[:,(val_labels_unseen)-1]
trainval_sig = signature[:,(trainval_labels_seen)-1]
test_sig = signature[:,(test_labels_unseen)-1]

This is a signature matrix, where the occurance of an attribute corresponding to the class is give.
For instance, if the classes are `horse` and `zebra` and the corresponding attributes are [wild_animal, 4_legged, carnivore]

```
 Horse      Zebra
[0.00354613 0.        ] Domestic_animal
[0.13829921 0.20209503] 4_legged
[0.06560347 0.04155225] carnivore
```

In [19]:
print(train_sig[3:6,:2])

[[-0.00375358  0.22753174]
 [ 0.0463192   0.01150855]
 [ 0.          0.        ]]


In [20]:
print("Signature for train:", train_sig.shape)
print("Signature for val:", val_sig.shape)
print("Signature for trainval:", trainval_sig.shape)
print("Signature for test:", test_sig.shape)

Signature for train: (85, 27)
Signature for val: (85, 13)
Signature for trainval: (85, 40)
Signature for test: (85, 10)


In [21]:
#params for train and val set
m_train = labels_train.shape[0]
n_val = labels_val.shape[0]
z_train = len(train_labels_seen)
z1_val = len(val_labels_unseen)

#params for trainval and test set
m_trainval = labels_trainval.shape[0]
n_test = labels_test.shape[0]
z_trainval = len(trainval_labels_seen)
z1_test = len(test_labels_unseen)

The ground truth is a one-hot encoded vector

In [22]:
#ground truth for train and val set
gt_train = 0*np.ones((m_train, z_train))
gt_train[np.arange(m_train), np.squeeze(labels_train)] = 1

#grountruth for trainval and test set
gt_trainval = 0*np.ones((m_trainval, z_trainval))
gt_trainval[np.arange(m_trainval), np.squeeze(labels_trainval)] = 1

In [23]:
gt_train[:1,:100]

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]])

In [24]:
#train set
d_train = train_vec.shape[0]
a_train = train_sig.shape[0]

#Weights
V = np.zeros((d_train,a_train))

In [25]:
#trainval set
d_trainval = trainval_vec.shape[0]
a_trainval = trainval_sig.shape[0]
W = np.zeros((d_trainval,a_trainval))

#Note: These hyper-parameters were found using the code snippet available below
gamm1 = 3
alph1 = 0

print(d_trainval)

The one-line code solution proposed.
```
V = inverse(XX' + γI) XYS' inverse(SS' + λI)
```



In [26]:
part_1_test = np.linalg.pinv(np.matmul(trainval_vec, trainval_vec.transpose()) + (10**alph1)*np.eye(d_trainval))
part_0_test = np.matmul(np.matmul(trainval_vec,gt_trainval),trainval_sig.transpose())
part_2_test = np.linalg.pinv(np.matmul(trainval_sig, trainval_sig.transpose()) + (10**gamm1)*np.eye(a_trainval))

W = np.matmul(np.matmul(part_1_test,part_0_test),part_2_test)

For inference stage, 
```
argmax(x'VS)
```
Where S is the signature matrix of the test_set

In [27]:
#predictions
outputs_1 = np.matmul(np.matmul(test_vec.transpose(),W),test_sig)
preds_1 = np.array([np.argmax(output) for output in outputs_1])

In [28]:
cm = confusion_matrix(labels_test, preds_1)
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
avg = sum(cm.diagonal())/len(test_labels_unseen)
print("The top 1% accuracy is:", avg*100)

The top 1% accuracy is: 45.73846664538193


------------------------------------------------------------------------------------------------
The below code snippet can be used to find the best hyper-parameter using the train and val set.

In [29]:
accu = 0.10
alph1 = 4
gamm1 = 1
for alpha in range(-3, 4):
    for gamma in range(-3,4):
        #One line solution
        part_1 = np.linalg.pinv(np.matmul(train_vec, train_vec.transpose()) + (10**alpha)*np.eye(d_train))
        part_0 = np.matmul(np.matmul(train_vec,gt_train),train_sig.transpose())
        part_2 = np.linalg.pinv(np.matmul(train_sig, train_sig.transpose()) + (10**gamma)*np.eye(a_train))

        V = np.matmul(np.matmul(part_1,part_0),part_2)
        #print(V)

        #predictions
        outputs = np.matmul(np.matmul(val_vec.transpose(),V),val_sig)
        preds = np.array([np.argmax(output) for output in outputs])

        #print(accuracy_score(labels_val,preds))
        cm = confusion_matrix(labels_val, preds)
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        avg = sum(cm.diagonal())/len(val_labels_unseen)
        #print("Avg:", avg, alpha, gamma)

        if avg > accu:
            accu = avg
            alph1 = alpha
            gamm1 = gamma
print(alph1, gamm1)

3 0
