In [270]:
import numpy as np
import os
import scipy.io
from sklearn.metrics import classification_report,confusion_matrix

In [271]:
#Please add the folder name of the dataset to run it on different dataset.
dataset = 'CUB'

In [272]:
#Replace Path to run on your own system
res101 = scipy.io.loadmat('C:/Users/Zach/Documents/NorthEastern/SmallDataMachineLearning/Code/ESZSL/Data/xlsa17/data/'+dataset+'/res101.mat')
att_splits = scipy.io.loadmat('C:/Users/Zach/Documents/NorthEastern/SmallDataMachineLearning/Code/ESZSL/Data/xlsa17/data/'+dataset+'/att_splits.mat')

In [273]:
res101.keys()

dict_keys(['__header__', '__version__', '__globals__', 'image_files', 'features', 'labels'])

In [274]:
att_splits.keys()

dict_keys(['__header__', '__version__', '__globals__', 'allclasses_names', 'att', 'original_att', 'test_seen_loc', 'test_unseen_loc', 'train_loc', 'trainval_loc', 'val_loc'])

In [275]:
#Using the correct naming conventions to get the loctions
trainval_loc = 'trainval_loc'
train_loc = 'train_loc'
val_loc = 'val_loc'
test_loc = 'test_unseen_loc'

We need the corresponding ground-truth labels/classes for each training example for all our train, val, trainval and test set according to the split locations provided.
In this example we have used the `CUB` dataset which has 200 unique classes overall.

In [276]:
#RES101 is Resnet 101, this is the output from CNN
labels = res101['labels']
#Array of training labels (class label)
labels_train = labels[np.squeeze(att_splits[train_loc]-1)]
#array of validation labels
labels_val = labels[np.squeeze(att_splits[val_loc]-1)]
#train + validation labels
labels_trainval = labels[np.squeeze(att_splits[trainval_loc]-1)]
#test labels
labels_test = labels[np.squeeze(att_splits[test_loc]-1)]



In [365]:
print(labels_test[:10])

[[32]
 [32]
 [32]
 [32]
 [32]
 [32]
 [32]
 [32]
 [32]
 [32]]


In [277]:
#Checking first ten labels
labels_train[:10,:]

array([[197],
       [198],
       [ 31],
       [ 25],
       [ 22],
       [ 86],
       [ 28],
       [136],
       [190],
       [177]], dtype=uint8)

In [278]:
#list all unique labels, should be 200 for birds
unique_labels = np.unique(labels)
unique_labels

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
       105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
       118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
       131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
       144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,
       157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
       170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 18

In a typical zero-shot learning scenario, there are no overlapping classes between training and testing phase, i.e the train classes are completely different from the test classes. So let us verify if there are any overlapping classes in the test and train scenario.
- During training phase we have `z` classes
- During the testing phase we have `z'` classes

In [279]:
train_labels_seen = np.unique(labels_train)
val_labels_unseen = np.unique(labels_val)
trainval_labels_seen = np.unique(labels_trainval)
test_labels_unseen = np.unique(labels_test)

In [280]:
print("Number of overlapping classes between train and val:",len(set(train_labels_seen).intersection(set(val_labels_unseen))))
print("Number of overlapping classes between trainval and test:",len(set(trainval_labels_seen).intersection(set(test_labels_unseen))))

Number of overlapping classes between train and val: 0
Number of overlapping classes between trainval and test: 0


In [281]:
#label_train now should be an index into the train_labels seen
#example: anything that had 200 as a label, now is 99 which is the last index in train_labels seen
#which aligns with label 200
i = 0
for labels in train_labels_seen:
    labels_train[labels_train == labels] = i    
    i = i+1
j = 0
for labels in val_labels_unseen:
    labels_val[labels_val == labels] = j
    j = j+1
k = 0
for labels in trainval_labels_seen:
    labels_trainval[labels_trainval == labels] = k
    k = k+1
l = 0
for labels in test_labels_unseen:
    labels_test[labels_test == labels] = l
    l = l+1





Let us denote the features X ∈ [d×m] available at training stage, where d is the dimensionality
of the data, and m is the number of instances. We are useing resnet features which are extracted from `CUB` dataset.

In [282]:
#Get all features of all data
X_features = res101['features']
#select those for the training
train_vec = X_features[:,np.squeeze(att_splits[train_loc]-1)]
#validation set
val_vec = X_features[:,np.squeeze(att_splits[val_loc]-1)]
#train+validation
trainval_vec = X_features[:,np.squeeze(att_splits[trainval_loc]-1)]
#last the test features
test_vec = X_features[:,np.squeeze(att_splits[test_loc]-1)]

In [283]:
#looks like resnet101 outputs 2048 features
print("Features for train:", train_vec.shape)
print("Features for val:", val_vec.shape)
print("Features for trainval:", trainval_vec.shape)
print("Features for test:", test_vec.shape)

Features for train: (2048, 4702)
Features for val: (2048, 2355)
Features for trainval: (2048, 7057)
Features for test: (2048, 2967)


#### Normalize the vectors

In [340]:
def normalization(vec,mean,std):
    sol = vec - mean
    sol1 = sol/std
    return sol1

Each of the classes in the dataset have an attribute (a) description. This vector is known as the `Signature matrix` of dimension S ∈ [0, 1]a×z. For training stage there are z classes and z' classes  for test S ∈ [0, 1]a×z'.

In [285]:
#Attribute Signature matrix (all)
#All normalized to have unit 12 norm
signature = att_splits['att']
#getting the training,validation,train+val,and the test signatures
train_sig = signature[:,(train_labels_seen)-1]
val_sig = signature[:,(val_labels_unseen)-1]
trainval_sig = signature[:,(trainval_labels_seen)-1]
test_sig = signature[:,(test_labels_unseen)-1]

This is a signature matrix, where the occurance of an attribute corresponding to the class is give.
For instance, if the classes are `horse` and `zebra` and the corresponding attributes are [wild_animal, 4_legged, carnivore]

```
 Horse      Zebra
[0.00354613 0.        ] Domestic_animal
[0.13829921 0.20209503] 4_legged
[0.06560347 0.04155225] carnivore
```

In [338]:
#for the Caltech Birds(CUB) there are 311 attributes
print(trainval_sig[3:6,:2])

[[0.00354613 0.        ]
 [0.13829921 0.20209503]
 [0.06560347 0.04155225]]


In [289]:
print("Signature for train:", train_sig.shape)
print("Signature for val:", val_sig.shape)
print("Signature for trainval:", trainval_sig.shape)
print("Signature for test:", test_sig.shape)

Signature for train: (312, 100)
Signature for val: (312, 50)
Signature for trainval: (312, 150)
Signature for test: (312, 50)


In [290]:
#params for train and val set
m_train = labels_train.shape[0]
n_val = labels_val.shape[0]
z_train = len(train_labels_seen)
z1_val = len(val_labels_unseen)

#params for trainval and test set
m_trainval = labels_trainval.shape[0]
n_test = labels_test.shape[0]
z_trainval = len(trainval_labels_seen)
z1_test = len(test_labels_unseen)

The ground truth is a one-hot encoded vector

In [307]:
#Setting ground truths for the the classes
#ground truth for train 
gt_train = 0*np.ones((m_train, z_train))
gt_train[np.arange(m_train), np.squeeze(labels_train)] = 1

#grountruth for trainval 
gt_trainval = 0*np.ones((m_trainval, z_trainval))
gt_trainval[np.arange(m_trainval), np.squeeze(labels_trainval)] = 1

In [308]:
gt_train[:1,:100]

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        1., 0., 0., 0.]])

In [394]:
#train set dimensions
d_train = train_vec.shape[0] #num of dimensions of features
a_train = train_sig.shape[0] #num of attributes

#Weights matrix
V = np.random.rand(d_train,a_train)
print(V)

[[0.24825399 0.61880022 0.66733556 ... 0.21101318 0.13566311 0.63188729]
 [0.70379073 0.08580839 0.77468547 ... 0.8497407  0.53233404 0.54755099]
 [0.47677835 0.57910177 0.24537089 ... 0.60384865 0.77892158 0.39479044]
 ...
 [0.28295789 0.68949735 0.82993046 ... 0.87317053 0.89815821 0.61049304]
 [0.79214452 0.71020766 0.14279853 ... 0.98062901 0.34519473 0.00806523]
 [0.92462524 0.31841636 0.05503308 ... 0.6702936  0.38139726 0.82884936]]


Need to add the formula in here

In [450]:
import torch
import torch.nn as nn
from sklearn.preprocessing import normalize 

V = np.random.rand(d_train,a_train)


#need my values X,V,S, and Y
V = torch.tensor(normalize(V),requires_grad=True)
#going to reassign
X = torch.tensor(normalize(trainval_vec))
S = torch.tensor(normalize(trainval_sig))
Y = torch.tensor(normalize(gt_trainval))


def forward(X,V,S):
    xT = torch.transpose(X,0,1)
    mult1 = torch.matmul(xT,V)
    ans = torch.matmul(mult1,S)
    return ans

#regularization term
def reg (V,X,S,g,l,b):
    part1 = g*(torch.linalg.norm(torch.matmul(V,S))**2)
    part2 = l * (torch.linalg.norm(torch.matmul(torch.transpose(X,0,1),V))**2)
    part3 = b * (torch.linalg.norm(V)**2)
    return part1 + part2 + part3

def calcLoss(X,Y,V,S,y_pred):
    mse_loss = nn.MSELoss()
    lossVal = mse_loss(y_pred,Y)
    regValue = reg(V,X,S,1,3,3)
    return lossVal + regValue



#Test values, just getting it to work
iter = 100
step = 0.1



for i in range(1):
    y_pred = forward(X,V,S)
    mse_loss = nn.MSELoss()
    print(mse_loss(y_pred,Y))
    print(reg(V,X,S,1,3,3))
    loss = mse_loss(y_pred,Y) + reg(V,X,S,1,3,3)
    loss.backward()
    V.data = V.data - step * V.grad.data
    V.grad.data.zero_()



tensor(86.8498, dtype=torch.float64, grad_fn=<MseLossBackward0>)
tensor(3214744.8311, dtype=torch.float64, grad_fn=<AddBackward0>)


In [451]:
print(V)

tensor([[-23.5793, -24.1631, -23.7921,  ..., -24.0764, -24.6045, -25.1257],
        [-22.1217, -22.6362, -22.3633,  ..., -22.7220, -23.1262, -23.4751],
        [-25.7809, -26.3437, -26.0187,  ..., -26.4470, -26.8665, -27.2526],
        ...,
        [-25.1067, -25.7430, -25.3959,  ..., -25.6406, -26.0979, -26.5328],
        [-19.6081, -20.1427, -19.8656,  ..., -20.1285, -20.5521, -20.9343],
        [-19.5461, -20.1518, -19.8740,  ..., -20.1180, -20.5359, -20.8851]],
       dtype=torch.float64, requires_grad=True)


In [453]:
#Inference stage
outPut = torch.tensor([])

#Label test data
xPrime = torch.tensor(normalize(test_vec))
sPrime = torch.tensor(normalize(test_sig))
#labels_test is indexes to test_labels_seen
#yTest = torch.tensor(labels_test)

#calculates the outputs
outputs_1 = torch.matmul(torch.matmul(torch.transpose(xPrime,0,1),V),sPrime)
#calculating the predictions
preds_1 = torch.tensor([torch.argmax(output) for output in outputs_1])		

#creating confusion?
cm = confusion_matrix(labels_test, preds_1)
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
avg = sum(cm.diagonal())/len(test_labels_unseen)

total = len(preds_1)
correct = 0
for i in range(total):      
    if preds_1[i] == torch.tensor(labels_test[i]):
        correct = correct + 1

print("The top accuracy is:", 100 * correct / (1.0 * total))


The top accuracy is: 2.0222446916076846
