In [356]:
import numpy as np
import os
import scipy.io
from sklearn.metrics import classification_report,confusion_matrix

In [357]:
#Please add the folder name of the dataset to run it on different dataset.
dataset = 'AWA1'

In [358]:
###############################################################################

#This will need to be updated to run on your own machine.

############################################################################### 
res101 = scipy.io.loadmat('C:/Users/Zach/Documents/NorthEastern/SmallDataMachineLearning/Code/ESZSL/Data/xlsa17/data/'+dataset+'/res101.mat')
att_splits = scipy.io.loadmat('C:/Users/Zach/Documents/NorthEastern/SmallDataMachineLearning/Code/ESZSL/Data/xlsa17/data/'+dataset+'/att_splits.mat')

In [359]:
res101.keys()

dict_keys(['__header__', '__version__', '__globals__', 'image_files', 'features', 'labels'])

In [360]:
att_splits.keys()

dict_keys(['__header__', '__version__', '__globals__', 'allclasses_names', 'att', 'original_att', 'test_seen_loc', 'test_unseen_loc', 'train_loc', 'trainval_loc', 'val_loc'])

In [361]:
#Using the correct naming conventions to get the loctions
trainval_loc = 'trainval_loc'
train_loc = 'train_loc'
val_loc = 'val_loc'
test_loc = 'test_unseen_loc'

We need the corresponding ground-truth labels/classes for each training example for all our train, val, trainval and test set according to the split locations provided.
In this example we have used the `CUB` dataset which has 200 unique classes overall.

In [362]:
#RES101 is Resnet 101, this is the output from CNN
labels = res101['labels']
#Array of training labels (class label)
labels_train = labels[np.squeeze(att_splits[train_loc]-1)]
#array of validation labels
labels_val = labels[np.squeeze(att_splits[val_loc]-1)]
#train + validation labels
labels_trainval = labels[np.squeeze(att_splits[trainval_loc]-1)]
#test labels
labels_test = labels[np.squeeze(att_splits[test_loc]-1)]



In [365]:
#list all unique labels, should be 200 for birds
unique_labels = np.unique(labels)
unique_labels

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50],
      dtype=uint8)

In a typical zero-shot learning scenario, there are no overlapping classes between training and testing phase, i.e the train classes are completely different from the test classes. So let us verify if there are any overlapping classes in the test and train scenario.
- During training phase we have `z` classes
- During the testing phase we have `z'` classes

In [366]:
train_labels_seen = np.unique(labels_train)
val_labels_unseen = np.unique(labels_val)
trainval_labels_seen = np.unique(labels_trainval)
test_labels_unseen = np.unique(labels_test)

In [367]:
print("Number of overlapping classes between train and val:",len(set(train_labels_seen).intersection(set(val_labels_unseen))))
print("Number of overlapping classes between trainval and test:",len(set(trainval_labels_seen).intersection(set(test_labels_unseen))))

Number of overlapping classes between train and val: 0
Number of overlapping classes between trainval and test: 0


In [368]:
#label_train now should be an index into the train_labels seen
#example: anything that had 200 as a label, now is 99 which is the last index in train_labels seen
#which aligns with label 200
i = 0
for labels in train_labels_seen:
    labels_train[labels_train == labels] = i    
    i = i+1
j = 0
for labels in val_labels_unseen:
    labels_val[labels_val == labels] = j
    j = j+1
k = 0
for labels in trainval_labels_seen:
    labels_trainval[labels_trainval == labels] = k
    k = k+1
l = 0
for labels in test_labels_unseen:
    labels_test[labels_test == labels] = l
    l = l+1

Let us denote the features X ∈ [d×m] available at training stage, where d is the dimensionality
of the data, and m is the number of instances. We are useing resnet features which are extracted from `CUB` dataset.

In [369]:
#Get all features of all data
X_features = res101['features']
#select those for the training
train_vec = X_features[:,np.squeeze(att_splits[train_loc]-1)]
#validation set
val_vec = X_features[:,np.squeeze(att_splits[val_loc]-1)]
#train+validation
trainval_vec = X_features[:,np.squeeze(att_splits[trainval_loc]-1)]
#last the test features
test_vec = X_features[:,np.squeeze(att_splits[test_loc]-1)]

In [370]:
#looks like resnet101 outputs 2048 features
print("Features for train:", train_vec.shape)
print("Features for val:", val_vec.shape)
print("Features for trainval:", trainval_vec.shape)
print("Features for test:", test_vec.shape)

Features for train: (2048, 13460)
Features for val: (2048, 6372)
Features for trainval: (2048, 19832)
Features for test: (2048, 5685)


#### Normalize the vectors

In [371]:
def normalization(vec,mean,std):
    sol = vec - mean
    sol1 = sol/std
    return sol1

Each of the classes in the dataset have an attribute (a) description. This vector is known as the `Signature matrix` of dimension S ∈ [0, 1]a×z. For training stage there are z classes and z' classes  for test S ∈ [0, 1]a×z'.

In [372]:
#Attribute Signature matrix (all)
#All normalized to have unit 12 norm
signature = att_splits['att']
#getting the training,validation,train+val,and the test signatures
train_sig = signature[:,(train_labels_seen)-1]
val_sig = signature[:,(val_labels_unseen)-1]
trainval_sig = signature[:,(trainval_labels_seen)-1]
test_sig = signature[:,(test_labels_unseen)-1]

This is a signature matrix, where the occurance of an attribute corresponding to the class is give.
For instance, if the classes are `horse` and `zebra` and the corresponding attributes are [wild_animal, 4_legged, carnivore]

```
 Horse      Zebra
[0.00354613 0.        ] Domestic_animal
[0.13829921 0.20209503] 4_legged
[0.06560347 0.04155225] carnivore
```

In [373]:
#for the Caltech Birds(CUB) there are 311 attributes
print(trainval_sig[3:6,:2])

[[-0.00375358  0.22753174]
 [ 0.0463192   0.01150855]
 [ 0.          0.        ]]


In [374]:
print("Signature for train:", train_sig.shape)
print("Signature for val:", val_sig.shape)
print("Signature for trainval:", trainval_sig.shape)
print("Signature for test:", test_sig.shape)

Signature for train: (85, 27)
Signature for val: (85, 13)
Signature for trainval: (85, 40)
Signature for test: (85, 10)


In [375]:
#params for train and val set
m_train = labels_train.shape[0]
n_val = labels_val.shape[0]
z_train = len(train_labels_seen)
z1_val = len(val_labels_unseen)

#params for trainval and test set
m_trainval = labels_trainval.shape[0]
n_test = labels_test.shape[0]
z_trainval = len(trainval_labels_seen)
z1_test = len(test_labels_unseen)

The ground truth is a one-hot encoded vector

In [376]:
#Setting ground truths for the the classes
#ground truth for train 
gt_train = 0*np.ones((m_train, z_train))
gt_train[np.arange(m_train), np.squeeze(labels_train)] = 1

#grountruth for trainval 
gt_trainval = 0*np.ones((m_trainval, z_trainval))
gt_trainval[np.arange(m_trainval), np.squeeze(labels_trainval)] = 1

In [377]:
gt_train[:1,:100]

array([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [378]:
#train set dimensions
d_train = train_vec.shape[0] #num of dimensions of features
a_train = train_sig.shape[0] #num of attributes


In [387]:
import torch
import torch.nn as nn
import torch.optim as optim

VIter = torch.tensor(np.random.rand(d_train,a_train)*1e-4, requires_grad=True)

#going to reassign
X = torch.tensor(train_vec)
S = torch.tensor(train_sig)
Y = torch.tensor(gt_train)

def forward(X,V,S):
    xT = torch.transpose(X,0,1)
    mult1 = torch.matmul(xT,V)
    ans = torch.matmul(mult1,S)
    #print(ans)
    return ans

#lossFunction
def loss_func(y_pred,Y):
    cel = nn.CrossEntropyLoss()
    return cel(y_pred,Y)

#regularization term
def reg (V,X,S,g,l,b):
    #Calculate frobenius norm of VS
    part1 = (10**g)*( torch.pow(torch.norm(
        torch.matmul(V,S)),2))
    #Calculate the Frobenius norm of XTV
    part2 = (10**l)*(torch.pow(torch.norm(
        torch.matmul(torch.transpose(X,0,1),V)),2))
    #Calculate the frobenius norm of V
    part3 = (10**b)*(torch.pow(torch.norm(V),2))
    return part1 + part2 + part3



#Test values, just getting it to work
iter = 50
optimizer = optim.Adam([VIter],lr =0.01)

#Simple loop for training
for i in range(iter):

    if VIter.grad is not None:
        VIter.grad.data.zero_()
    
    y_pred = forward(X,VIter,S)
    #These values were derived by trial and error for hyperparameters
    #Should create a script to vary, and pick optimal
    loss = loss_func(y_pred,Y) + reg(VIter,X,S,-6,-6,-6) 
    loss.backward()
    optimizer.step()




In [380]:
#Used this to check if it diverged to nan or not
#print(VIter)

tensor([[-0.0022,  0.0243,  0.0035,  ..., -0.0102, -0.0059,  0.0078],
        [ 0.0092,  0.0012, -0.0014,  ..., -0.0009, -0.0002, -0.0163],
        [-0.0017, -0.0052, -0.0078,  ...,  0.0012, -0.0005, -0.0084],
        ...,
        [ 0.0155,  0.0060,  0.0006,  ...,  0.0027, -0.0020,  0.0187],
        [-0.0065, -0.0038,  0.0019,  ...,  0.0011, -0.0008, -0.0045],
        [-0.0103, -0.0056, -0.0014,  ..., -0.0102, -0.0059, -0.0112]],
       dtype=torch.float64, requires_grad=True)


In [381]:
#Inference stage
outPut = torch.tensor([])

#Label test data
xPrime = torch.tensor(test_vec)
sPrime = torch.tensor(test_sig)

#calculates the outputs
outputs_1 = torch.matmul(torch.matmul(torch.transpose(xPrime,0,1),VIter),sPrime)
#calculating the predictions
preds_1 = torch.tensor([torch.argmax(output) for output in outputs_1])		

#creating confusion matrix
cm = confusion_matrix(labels_test, preds_1)
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
avg = sum(cm.diagonal())/len(test_labels_unseen)

print("The iterative accuracy is:", avg * 100)

#trainval set
d_trainval = trainval_vec.shape[0]
a_trainval = trainval_sig.shape[0]
W = np.zeros((d_trainval,a_trainval))
gamm1 = 3
alph1 = 0

part_1_test = np.linalg.pinv(np.matmul(trainval_vec, trainval_vec.transpose()) + (10**alph1)*np.eye(d_trainval))
part_0_test = np.matmul(np.matmul(trainval_vec,gt_trainval),trainval_sig.transpose())
part_2_test = np.linalg.pinv(np.matmul(trainval_sig, trainval_sig.transpose()) + (10**gamm1)*np.eye(a_trainval))

W = np.matmul(np.matmul(part_1_test,part_0_test),part_2_test)
W = torch.tensor(W)
#calculates the outputs
outputs_1 = torch.matmul(torch.matmul(torch.transpose(xPrime,0,1),W),sPrime)
#calculating the predictions
preds_1 = torch.tensor([torch.argmax(output) for output in outputs_1])		

#creating confusion matrix
cm = confusion_matrix(labels_test, preds_1)
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
avg = sum(cm.diagonal())/len(test_labels_unseen)

print("The closed form accuracy is:", avg * 100)


The iterative accuracy is: 57.0401978231754
The closed form accuracy is: 46.75198175168951
